From 6e006701ccc1590500186ef21e074bd900c5dd67 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 20 Jan 2010 22:27:56 +0200 Subject: dnotify: move dir_notify_enable declaration Move dir_notify_enable declaration to where it belongs -- dnotify.h . Signed-off-by: Alexey Dobriyan Signed-off-by: Eric Paris --- kernel/sysctl.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d24f761f4876..7b983dbfe0ec 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From d14f1729483fad3a8817fbbcbd017678b7d1ad26 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Thu, 25 Feb 2010 20:28:57 -0500 Subject: sysctl extern cleanup: inotify Extern declarations in sysctl.c should be move to their own head file, and then include them in relavant .c files. Move inotify_table extern declaration to linux/inotify.h Signed-off-by: Dave Young Signed-off-by: Andrew Morton Signed-off-by: Eric Paris --- include/linux/inotify.h | 5 +++++ kernel/sysctl.c | 6 +++--- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/include/linux/inotify.h b/include/linux/inotify.h index 959a38b8f75d..94d209a1b689 100644 --- a/include/linux/inotify.h +++ b/include/linux/inotify.h @@ -69,4 +69,9 @@ struct inotify_event { #define IN_CLOEXEC O_CLOEXEC #define IN_NONBLOCK O_NONBLOCK +#ifdef __KERNEL__ +#include +extern struct ctl_table inotify_table[]; /* for sysctl */ +#endif + #endif /* _LINUX_INOTIFY_H */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 7b983dbfe0ec..fe30db7bdb0a 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -131,6 +131,9 @@ static int min_percpu_pagelist_fract = 8; static int ngroups_max = NGROUPS_MAX; +#ifdef CONFIG_INOTIFY_USER +#include +#endif #ifdef CONFIG_SPARC #include #endif @@ -207,9 +210,6 @@ static struct ctl_table fs_table[]; static struct ctl_table debug_table[]; static struct ctl_table dev_table[]; extern struct ctl_table random_table[]; -#ifdef CONFIG_INOTIFY_USER -extern struct ctl_table inotify_table[]; -#endif #ifdef CONFIG_EPOLL extern struct ctl_table epoll_table[]; #endif -- cgit v1.2.3 From b3bd3de66f60df4c9a2076e2886a622458929056 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 10 Aug 2010 14:17:51 -0700 Subject: gcc-4.6: kernel/*: Fix unused but set warnings No real bugs I believe, just some dead code. Signed-off-by: Andi Kleen Cc: Peter Zijlstra Cc: andi@firstfloor.org Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- kernel/debug/kdb/kdb_bp.c | 2 -- kernel/hrtimer.c | 3 +-- kernel/sched_fair.c | 3 +-- kernel/sysctl.c | 5 +---- kernel/trace/ring_buffer.c | 2 -- 5 files changed, 3 insertions(+), 12 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c index 75bd9b3ebbb7..20059ef4459a 100644 --- a/kernel/debug/kdb/kdb_bp.c +++ b/kernel/debug/kdb/kdb_bp.c @@ -274,7 +274,6 @@ static int kdb_bp(int argc, const char **argv) int i, bpno; kdb_bp_t *bp, *bp_check; int diag; - int free; char *symname = NULL; long offset = 0ul; int nextarg; @@ -305,7 +304,6 @@ static int kdb_bp(int argc, const char **argv) /* * Find an empty bp structure to allocate */ - free = KDB_MAXBPT; for (bpno = 0, bp = kdb_breakpoints; bpno < KDB_MAXBPT; bpno++, bp++) { if (bp->bp_free) break; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index ce669174f355..1decafbb6b1a 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1091,11 +1091,10 @@ EXPORT_SYMBOL_GPL(hrtimer_cancel); */ ktime_t hrtimer_get_remaining(const struct hrtimer *timer) { - struct hrtimer_clock_base *base; unsigned long flags; ktime_t rem; - base = lock_hrtimer_base(timer, &flags); + lock_hrtimer_base(timer, &flags); rem = hrtimer_expires_remaining(timer); unlock_hrtimer_base(timer, &flags); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index ab661ebc4895..134f7edb30c6 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1313,7 +1313,7 @@ static struct sched_group * find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu, int load_idx) { - struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups; + struct sched_group *idlest = NULL, *group = sd->groups; unsigned long min_load = ULONG_MAX, this_load = 0; int imbalance = 100 + (sd->imbalance_pct-100)/2; @@ -1348,7 +1348,6 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, if (local_group) { this_load = avg_load; - this = group; } else if (avg_load < min_load) { min_load = avg_load; idlest = group; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ca38e8e3e907..f88552c6d227 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1713,10 +1713,7 @@ static __init int sysctl_init(void) { sysctl_set_parent(NULL, root_table); #ifdef CONFIG_SYSCTL_SYSCALL_CHECK - { - int err; - err = sysctl_check_table(current->nsproxy, root_table); - } + sysctl_check_table(current->nsproxy, root_table); #endif return 0; } diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 19cccc3c3028..492197e2f86c 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2985,13 +2985,11 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) static void rb_advance_iter(struct ring_buffer_iter *iter) { - struct ring_buffer *buffer; struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; unsigned length; cpu_buffer = iter->cpu_buffer; - buffer = cpu_buffer->buffer; /* * Check if we are at the end of the buffer. -- cgit v1.2.3 From 27b3d80a7b6adcf069b5e869e4efcc3a79f88a91 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Oct 2010 12:59:29 -0700 Subject: sysctl: fix min/max handling in __do_proc_doulongvec_minmax() When proc_doulongvec_minmax() is used with an array of longs, and no min/max check requested (.extra1 or .extra2 being NULL), we dereference a NULL pointer for the second element of the array. Noticed while doing some changes in network stack for the "16TB problem" Fix is to not change min & max pointers in __do_proc_doulongvec_minmax(), so that all elements of the vector share an unique min/max limit, like proc_dointvec_minmax(). [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Eric Dumazet Cc: "Eric W. Biederman" Cc: Americo Wang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f88552c6d227..3a45c224770f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2485,7 +2485,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int kbuf[left] = 0; } - for (; left && vleft--; i++, min++, max++, first=0) { + for (; left && vleft--; i++, first = 0) { unsigned long val; if (write) { -- cgit v1.2.3 From 7e360c38abe2c70eae3ba5a8a17f17671d8b77c5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Oct 2010 09:32:55 +0200 Subject: fs: allow for more than 2^31 files Andrew, Could you please review this patch, you probably are the right guy to take it, because it crosses fs and net trees. Note : /proc/sys/fs/file-nr is a read-only file, so this patch doesnt depend on previous patch (sysctl: fix min/max handling in __do_proc_doulongvec_minmax()) Thanks ! [PATCH V4] fs: allow for more than 2^31 files Robin Holt tried to boot a 16TB system and found af_unix was overflowing a 32bit value : We were seeing a failure which prevented boot. The kernel was incapable of creating either a named pipe or unix domain socket. This comes down to a common kernel function called unix_create1() which does: atomic_inc(&unix_nr_socks); if (atomic_read(&unix_nr_socks) > 2 * get_max_files()) goto out; The function get_max_files() is a simple return of files_stat.max_files. files_stat.max_files is a signed integer and is computed in fs/file_table.c's files_init(). n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = n; In our case, mempages (total_ram_pages) is approx 3,758,096,384 (0xe0000000). That leaves max_files at approximately 1,503,238,553. This causes 2 * get_max_files() to integer overflow. Fix is to let /proc/sys/fs/file-nr & /proc/sys/fs/file-max use long integers, and change af_unix to use an atomic_long_t instead of atomic_t. get_max_files() is changed to return an unsigned long. get_nr_files() is changed to return a long. unix_nr_socks is changed from atomic_t to atomic_long_t, while not strictly needed to address Robin problem. Before patch (on a 64bit kernel) : # echo 2147483648 >/proc/sys/fs/file-max # cat /proc/sys/fs/file-max -18446744071562067968 After patch: # echo 2147483648 >/proc/sys/fs/file-max # cat /proc/sys/fs/file-max 2147483648 # cat /proc/sys/fs/file-nr 704 0 2147483648 Reported-by: Robin Holt Signed-off-by: Eric Dumazet Acked-by: David Miller Reviewed-by: Robin Holt Tested-by: Robin Holt Signed-off-by: Al Viro --- fs/file_table.c | 17 +++++++---------- include/linux/fs.h | 8 ++++---- kernel/sysctl.c | 6 +++--- net/unix/af_unix.c | 14 +++++++------- 4 files changed, 21 insertions(+), 24 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/fs/file_table.c b/fs/file_table.c index a04bdd81c11c..c3dee381f1b4 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -60,7 +60,7 @@ static inline void file_free(struct file *f) /* * Return the total number of open files in the system */ -static int get_nr_files(void) +static long get_nr_files(void) { return percpu_counter_read_positive(&nr_files); } @@ -68,7 +68,7 @@ static int get_nr_files(void) /* * Return the maximum number of open files in the system */ -int get_max_files(void) +unsigned long get_max_files(void) { return files_stat.max_files; } @@ -82,7 +82,7 @@ int proc_nr_files(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { files_stat.nr_files = get_nr_files(); - return proc_dointvec(table, write, buffer, lenp, ppos); + return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } #else int proc_nr_files(ctl_table *table, int write, @@ -105,7 +105,7 @@ int proc_nr_files(ctl_table *table, int write, struct file *get_empty_filp(void) { const struct cred *cred = current_cred(); - static int old_max; + static long old_max; struct file * f; /* @@ -140,8 +140,7 @@ struct file *get_empty_filp(void) over: /* Ran out of filps - report that */ if (get_nr_files() > old_max) { - printk(KERN_INFO "VFS: file-max limit %d reached\n", - get_max_files()); + pr_info("VFS: file-max limit %lu reached\n", get_max_files()); old_max = get_nr_files(); } goto fail; @@ -487,7 +486,7 @@ retry: void __init files_init(unsigned long mempages) { - int n; + unsigned long n; filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); @@ -498,9 +497,7 @@ void __init files_init(unsigned long mempages) */ n = (mempages * (PAGE_SIZE / 1024)) / 10; - files_stat.max_files = n; - if (files_stat.max_files < NR_FILE) - files_stat.max_files = NR_FILE; + files_stat.max_files = max_t(unsigned long, n, NR_FILE); files_defer_init(); lg_lock_init(files_lglock); percpu_counter_init(&nr_files, 0); diff --git a/include/linux/fs.h b/include/linux/fs.h index 0a5d83633884..0cd6821013a0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -34,9 +34,9 @@ /* And dynamically-tunable limits and defaults: */ struct files_stat_struct { - int nr_files; /* read only */ - int nr_free_files; /* read only */ - int max_files; /* tunable */ + unsigned long nr_files; /* read only */ + unsigned long nr_free_files; /* read only */ + unsigned long max_files; /* tunable */ }; struct inodes_stat_t { @@ -400,7 +400,7 @@ extern void __init inode_init_early(void); extern void __init files_init(unsigned long); extern struct files_stat_struct files_stat; -extern int get_max_files(void); +extern unsigned long get_max_files(void); extern int sysctl_nr_open; extern struct inodes_stat_t inodes_stat; extern int leases_enable, lease_break_time; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3a45c224770f..694b140852c2 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1352,16 +1352,16 @@ static struct ctl_table fs_table[] = { { .procname = "file-nr", .data = &files_stat, - .maxlen = 3*sizeof(int), + .maxlen = sizeof(files_stat), .mode = 0444, .proc_handler = proc_nr_files, }, { .procname = "file-max", .data = &files_stat.max_files, - .maxlen = sizeof(int), + .maxlen = sizeof(files_stat.max_files), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_doulongvec_minmax, }, { .procname = "nr_open", diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 0ebc777a6660..3c95304a0817 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -117,7 +117,7 @@ static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; static DEFINE_SPINLOCK(unix_table_lock); -static atomic_t unix_nr_socks = ATOMIC_INIT(0); +static atomic_long_t unix_nr_socks; #define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) @@ -360,13 +360,13 @@ static void unix_sock_destructor(struct sock *sk) if (u->addr) unix_release_addr(u->addr); - atomic_dec(&unix_nr_socks); + atomic_long_dec(&unix_nr_socks); local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); local_bh_enable(); #ifdef UNIX_REFCNT_DEBUG - printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, - atomic_read(&unix_nr_socks)); + printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk, + atomic_long_read(&unix_nr_socks)); #endif } @@ -606,8 +606,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock) struct sock *sk = NULL; struct unix_sock *u; - atomic_inc(&unix_nr_socks); - if (atomic_read(&unix_nr_socks) > 2 * get_max_files()) + atomic_long_inc(&unix_nr_socks); + if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) goto out; sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto); @@ -632,7 +632,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock) unix_insert_socket(unix_sockets_unbound, sk); out: if (sk == NULL) - atomic_dec(&unix_nr_socks); + atomic_long_dec(&unix_nr_socks); else { local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); -- cgit v1.2.3 From cffbc8aa334f55c9ed42d25202eb3ebf3a97c195 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Sat, 23 Oct 2010 05:03:02 -0400 Subject: fs: Convert nr_inodes and nr_unused to per-cpu counters The number of inodes allocated does not need to be tied to the addition or removal of an inode to/from a list. If we are not tied to a list lock, we could update the counters when inodes are initialised or destroyed, but to do that we need to convert the counters to be per-cpu (i.e. independent of a lock). This means that we have the freedom to change the list/locking implementation without needing to care about the counters. Based on a patch originally from Eric Dumazet. [AV: cleaned up a bit, fixed build breakage on weird configs Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/fs-writeback.c | 5 ++--- fs/inode.c | 64 ++++++++++++++++++++++++++++++++++++++---------------- fs/internal.h | 1 + include/linux/fs.h | 3 ++- kernel/sysctl.c | 4 ++-- 5 files changed, 52 insertions(+), 25 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 39f44f2e709a..f04d04af84f2 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -723,7 +723,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) wb->last_old_flush = jiffies; nr_pages = global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS) + - (inodes_stat.nr_inodes - inodes_stat.nr_unused); + get_nr_dirty_inodes(); if (nr_pages) { struct wb_writeback_work work = { @@ -1090,8 +1090,7 @@ void writeback_inodes_sb(struct super_block *sb) WARN_ON(!rwsem_is_locked(&sb->s_umount)); - work.nr_pages = nr_dirty + nr_unstable + - (inodes_stat.nr_inodes - inodes_stat.nr_unused); + work.nr_pages = nr_dirty + nr_unstable + get_nr_dirty_inodes(); bdi_queue_work(sb->s_bdi, &work); wait_for_completion(&done); diff --git a/fs/inode.c b/fs/inode.c index 4440cf1034ec..0d5aeccbdd90 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -103,8 +103,41 @@ static DECLARE_RWSEM(iprune_sem); */ struct inodes_stat_t inodes_stat; +static struct percpu_counter nr_inodes __cacheline_aligned_in_smp; +static struct percpu_counter nr_inodes_unused __cacheline_aligned_in_smp; + static struct kmem_cache *inode_cachep __read_mostly; +static inline int get_nr_inodes(void) +{ + return percpu_counter_sum_positive(&nr_inodes); +} + +static inline int get_nr_inodes_unused(void) +{ + return percpu_counter_sum_positive(&nr_inodes_unused); +} + +int get_nr_dirty_inodes(void) +{ + int nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); + return nr_dirty > 0 ? nr_dirty : 0; + +} + +/* + * Handle nr_inode sysctl + */ +#ifdef CONFIG_SYSCTL +int proc_nr_inodes(ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + inodes_stat.nr_inodes = get_nr_inodes(); + inodes_stat.nr_unused = get_nr_inodes_unused(); + return proc_dointvec(table, write, buffer, lenp, ppos); +} +#endif + static void wake_up_inode(struct inode *inode) { /* @@ -192,6 +225,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_fsnotify_mask = 0; #endif + percpu_counter_inc(&nr_inodes); + return 0; out: return -ENOMEM; @@ -232,6 +267,7 @@ void __destroy_inode(struct inode *inode) if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) posix_acl_release(inode->i_default_acl); #endif + percpu_counter_dec(&nr_inodes); } EXPORT_SYMBOL(__destroy_inode); @@ -286,7 +322,7 @@ void __iget(struct inode *inode) if (!(inode->i_state & (I_DIRTY|I_SYNC))) list_move(&inode->i_list, &inode_in_use); - inodes_stat.nr_unused--; + percpu_counter_dec(&nr_inodes_unused); } void end_writeback(struct inode *inode) @@ -327,8 +363,6 @@ static void evict(struct inode *inode) */ static void dispose_list(struct list_head *head) { - int nr_disposed = 0; - while (!list_empty(head)) { struct inode *inode; @@ -344,11 +378,7 @@ static void dispose_list(struct list_head *head) wake_up_inode(inode); destroy_inode(inode); - nr_disposed++; } - spin_lock(&inode_lock); - inodes_stat.nr_inodes -= nr_disposed; - spin_unlock(&inode_lock); } /* @@ -357,7 +387,7 @@ static void dispose_list(struct list_head *head) static int invalidate_list(struct list_head *head, struct list_head *dispose) { struct list_head *next; - int busy = 0, count = 0; + int busy = 0; next = head->next; for (;;) { @@ -383,13 +413,11 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) list_move(&inode->i_list, dispose); WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; - count++; + percpu_counter_dec(&nr_inodes_unused); continue; } busy = 1; } - /* only unused inodes may be cached with i_count zero */ - inodes_stat.nr_unused -= count; return busy; } @@ -447,7 +475,6 @@ static int can_unuse(struct inode *inode) static void prune_icache(int nr_to_scan) { LIST_HEAD(freeable); - int nr_pruned = 0; int nr_scanned; unsigned long reap = 0; @@ -483,9 +510,8 @@ static void prune_icache(int nr_to_scan) list_move(&inode->i_list, &freeable); WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; - nr_pruned++; + percpu_counter_dec(&nr_inodes_unused); } - inodes_stat.nr_unused -= nr_pruned; if (current_is_kswapd()) __count_vm_events(KSWAPD_INODESTEAL, reap); else @@ -517,7 +543,7 @@ static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) return -1; prune_icache(nr); } - return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; + return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure; } static struct shrinker icache_shrinker = { @@ -594,7 +620,6 @@ static inline void __inode_add_to_lists(struct super_block *sb, struct hlist_head *head, struct inode *inode) { - inodes_stat.nr_inodes++; list_add(&inode->i_list, &inode_in_use); list_add(&inode->i_sb_list, &sb->s_inodes); if (head) @@ -1214,7 +1239,7 @@ static void iput_final(struct inode *inode) if (!drop) { if (!(inode->i_state & (I_DIRTY|I_SYNC))) list_move(&inode->i_list, &inode_unused); - inodes_stat.nr_unused++; + percpu_counter_inc(&nr_inodes_unused); if (sb->s_flags & MS_ACTIVE) { spin_unlock(&inode_lock); return; @@ -1226,14 +1251,13 @@ static void iput_final(struct inode *inode) spin_lock(&inode_lock); WARN_ON(inode->i_state & I_NEW); inode->i_state &= ~I_WILL_FREE; - inodes_stat.nr_unused--; + percpu_counter_dec(&nr_inodes_unused); hlist_del_init(&inode->i_hash); } list_del_init(&inode->i_list); list_del_init(&inode->i_sb_list); WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; - inodes_stat.nr_inodes--; spin_unlock(&inode_lock); evict(inode); spin_lock(&inode_lock); @@ -1502,6 +1526,8 @@ void __init inode_init(void) SLAB_MEM_SPREAD), init_once); register_shrinker(&icache_shrinker); + percpu_counter_init(&nr_inodes, 0); + percpu_counter_init(&nr_inodes_unused, 0); /* Hash may have been set up in inode_init_early */ if (!hashdist) diff --git a/fs/internal.h b/fs/internal.h index f6dce46d80dc..4cc67eb6ed56 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -105,4 +105,5 @@ extern void release_open_intent(struct nameidata *); /* * inode.c */ +extern int get_nr_dirty_inodes(void); extern int invalidate_inodes(struct super_block *); diff --git a/include/linux/fs.h b/include/linux/fs.h index 78043da85e1f..a3937a8ee95e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2486,7 +2486,8 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, struct ctl_table; int proc_nr_files(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); - +int proc_nr_inodes(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); int __init get_filesystem_list(char *buf); #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 694b140852c2..99a510cbfbb3 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1340,14 +1340,14 @@ static struct ctl_table fs_table[] = { .data = &inodes_stat, .maxlen = 2*sizeof(int), .mode = 0444, - .proc_handler = proc_dointvec, + .proc_handler = proc_nr_inodes, }, { .procname = "inode-state", .data = &inodes_stat, .maxlen = 7*sizeof(int), .mode = 0444, - .proc_handler = proc_dointvec, + .proc_handler = proc_nr_inodes, }, { .procname = "file-nr", -- cgit v1.2.3 From 312d3ca856d369bb04d0443846b85b4cdde6fa8a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 10 Oct 2010 05:36:23 -0400 Subject: fs: use percpu counter for nr_dentry and nr_dentry_unused The nr_dentry stat is a globally touched cacheline and atomic operation twice over the lifetime of a dentry. It is used for the benfit of userspace only. Turn it into a per-cpu counter and always decrement it in d_free instead of doing various batching operations to reduce lock hold times in the callers. Based on an earlier patch from Nick Piggin . Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/dcache.c | 51 ++++++++++++++++++++++++++++++++------------------- include/linux/fs.h | 2 ++ kernel/sysctl.c | 2 +- 3 files changed, 35 insertions(+), 20 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/fs/dcache.c b/fs/dcache.c index 028753951e95..c37a656802b0 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -67,6 +67,19 @@ struct dentry_stat_t dentry_stat = { .age_limit = 45, }; +static struct percpu_counter nr_dentry __cacheline_aligned_in_smp; +static struct percpu_counter nr_dentry_unused __cacheline_aligned_in_smp; + +#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) +int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + dentry_stat.nr_dentry = percpu_counter_sum_positive(&nr_dentry); + dentry_stat.nr_unused = percpu_counter_sum_positive(&nr_dentry_unused); + return proc_dointvec(table, write, buffer, lenp, ppos); +} +#endif + static void __d_free(struct rcu_head *head) { struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); @@ -78,13 +91,14 @@ static void __d_free(struct rcu_head *head) } /* - * no dcache_lock, please. The caller must decrement dentry_stat.nr_dentry - * inside dcache_lock. + * no dcache_lock, please. */ static void d_free(struct dentry *dentry) { + percpu_counter_dec(&nr_dentry); if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); + /* if dentry was never inserted into hash, immediate free is OK */ if (hlist_unhashed(&dentry->d_hash)) __d_free(&dentry->d_u.d_rcu); @@ -125,14 +139,14 @@ static void dentry_lru_add(struct dentry *dentry) { list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); dentry->d_sb->s_nr_dentry_unused++; - dentry_stat.nr_unused++; + percpu_counter_inc(&nr_dentry_unused); } static void dentry_lru_add_tail(struct dentry *dentry) { list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); dentry->d_sb->s_nr_dentry_unused++; - dentry_stat.nr_unused++; + percpu_counter_inc(&nr_dentry_unused); } static void dentry_lru_del(struct dentry *dentry) @@ -140,7 +154,7 @@ static void dentry_lru_del(struct dentry *dentry) if (!list_empty(&dentry->d_lru)) { list_del(&dentry->d_lru); dentry->d_sb->s_nr_dentry_unused--; - dentry_stat.nr_unused--; + percpu_counter_dec(&nr_dentry_unused); } } @@ -149,7 +163,7 @@ static void dentry_lru_del_init(struct dentry *dentry) if (likely(!list_empty(&dentry->d_lru))) { list_del_init(&dentry->d_lru); dentry->d_sb->s_nr_dentry_unused--; - dentry_stat.nr_unused--; + percpu_counter_dec(&nr_dentry_unused); } } @@ -168,7 +182,6 @@ static struct dentry *d_kill(struct dentry *dentry) struct dentry *parent; list_del(&dentry->d_u.d_child); - dentry_stat.nr_dentry--; /* For d_free, below */ /*drops the locks, at that point nobody can reach this dentry */ dentry_iput(dentry); if (IS_ROOT(dentry)) @@ -314,7 +327,6 @@ int d_invalidate(struct dentry * dentry) EXPORT_SYMBOL(d_invalidate); /* This should be called _only_ with dcache_lock held */ - static inline struct dentry * __dget_locked(struct dentry *dentry) { atomic_inc(&dentry->d_count); @@ -534,7 +546,7 @@ static void prune_dcache(int count) { struct super_block *sb, *p = NULL; int w_count; - int unused = dentry_stat.nr_unused; + int unused = percpu_counter_sum_positive(&nr_dentry_unused); int prune_ratio; int pruned; @@ -699,20 +711,13 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) * otherwise we ascend to the parent and move to the * next sibling if there is one */ if (!parent) - goto out; - + return; dentry = parent; - } while (list_empty(&dentry->d_subdirs)); dentry = list_entry(dentry->d_subdirs.next, struct dentry, d_u.d_child); } -out: - /* several dentries were freed, need to correct nr_dentry */ - spin_lock(&dcache_lock); - dentry_stat.nr_dentry -= detached; - spin_unlock(&dcache_lock); } /* @@ -896,12 +901,16 @@ EXPORT_SYMBOL(shrink_dcache_parent); */ static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) { + int nr_unused; + if (nr) { if (!(gfp_mask & __GFP_FS)) return -1; prune_dcache(nr); } - return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; + + nr_unused = percpu_counter_sum_positive(&nr_dentry_unused); + return (nr_unused / 100) * sysctl_vfs_cache_pressure; } static struct shrinker dcache_shrinker = { @@ -968,9 +977,10 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) spin_lock(&dcache_lock); if (parent) list_add(&dentry->d_u.d_child, &parent->d_subdirs); - dentry_stat.nr_dentry++; spin_unlock(&dcache_lock); + percpu_counter_inc(&nr_dentry); + return dentry; } EXPORT_SYMBOL(d_alloc); @@ -2417,6 +2427,9 @@ static void __init dcache_init(void) { int loop; + percpu_counter_init(&nr_dentry, 0); + percpu_counter_init(&nr_dentry_unused, 0); + /* * A constructor could be added for stable state like the lists, * but it is probably not worth it because of the cache nature diff --git a/include/linux/fs.h b/include/linux/fs.h index 4a573cf13f51..d58059944801 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2490,6 +2490,8 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, struct ctl_table; int proc_nr_files(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +int proc_nr_dentry(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); int proc_nr_inodes(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); int __init get_filesystem_list(char *buf); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 99a510cbfbb3..8b77ff5c502c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1377,7 +1377,7 @@ static struct ctl_table fs_table[] = { .data = &dentry_stat, .maxlen = 6*sizeof(int), .mode = 0444, - .proc_handler = proc_dointvec, + .proc_handler = proc_nr_dentry, }, { .procname = "overflowuid", -- cgit v1.2.3 From 518de9b39e854542de59bfb8b9f61c8f7ecf808b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 Oct 2010 14:22:44 -0700 Subject: fs: allow for more than 2^31 files Robin Holt tried to boot a 16TB system and found af_unix was overflowing a 32bit value : We were seeing a failure which prevented boot. The kernel was incapable of creating either a named pipe or unix domain socket. This comes down to a common kernel function called unix_create1() which does: atomic_inc(&unix_nr_socks); if (atomic_read(&unix_nr_socks) > 2 * get_max_files()) goto out; The function get_max_files() is a simple return of files_stat.max_files. files_stat.max_files is a signed integer and is computed in fs/file_table.c's files_init(). n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = n; In our case, mempages (total_ram_pages) is approx 3,758,096,384 (0xe0000000). That leaves max_files at approximately 1,503,238,553. This causes 2 * get_max_files() to integer overflow. Fix is to let /proc/sys/fs/file-nr & /proc/sys/fs/file-max use long integers, and change af_unix to use an atomic_long_t instead of atomic_t. get_max_files() is changed to return an unsigned long. get_nr_files() is changed to return a long. unix_nr_socks is changed from atomic_t to atomic_long_t, while not strictly needed to address Robin problem. Before patch (on a 64bit kernel) : # echo 2147483648 >/proc/sys/fs/file-max # cat /proc/sys/fs/file-max -18446744071562067968 After patch: # echo 2147483648 >/proc/sys/fs/file-max # cat /proc/sys/fs/file-max 2147483648 # cat /proc/sys/fs/file-nr 704 0 2147483648 Reported-by: Robin Holt Signed-off-by: Eric Dumazet Acked-by: David Miller Reviewed-by: Robin Holt Tested-by: Robin Holt Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/file_table.c | 17 +++++++---------- include/linux/fs.h | 8 ++++---- kernel/sysctl.c | 6 +++--- net/unix/af_unix.c | 14 +++++++------- 4 files changed, 21 insertions(+), 24 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/fs/file_table.c b/fs/file_table.c index a04bdd81c11c..c3dee381f1b4 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -60,7 +60,7 @@ static inline void file_free(struct file *f) /* * Return the total number of open files in the system */ -static int get_nr_files(void) +static long get_nr_files(void) { return percpu_counter_read_positive(&nr_files); } @@ -68,7 +68,7 @@ static int get_nr_files(void) /* * Return the maximum number of open files in the system */ -int get_max_files(void) +unsigned long get_max_files(void) { return files_stat.max_files; } @@ -82,7 +82,7 @@ int proc_nr_files(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { files_stat.nr_files = get_nr_files(); - return proc_dointvec(table, write, buffer, lenp, ppos); + return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } #else int proc_nr_files(ctl_table *table, int write, @@ -105,7 +105,7 @@ int proc_nr_files(ctl_table *table, int write, struct file *get_empty_filp(void) { const struct cred *cred = current_cred(); - static int old_max; + static long old_max; struct file * f; /* @@ -140,8 +140,7 @@ struct file *get_empty_filp(void) over: /* Ran out of filps - report that */ if (get_nr_files() > old_max) { - printk(KERN_INFO "VFS: file-max limit %d reached\n", - get_max_files()); + pr_info("VFS: file-max limit %lu reached\n", get_max_files()); old_max = get_nr_files(); } goto fail; @@ -487,7 +486,7 @@ retry: void __init files_init(unsigned long mempages) { - int n; + unsigned long n; filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); @@ -498,9 +497,7 @@ void __init files_init(unsigned long mempages) */ n = (mempages * (PAGE_SIZE / 1024)) / 10; - files_stat.max_files = n; - if (files_stat.max_files < NR_FILE) - files_stat.max_files = NR_FILE; + files_stat.max_files = max_t(unsigned long, n, NR_FILE); files_defer_init(); lg_lock_init(files_lglock); percpu_counter_init(&nr_files, 0); diff --git a/include/linux/fs.h b/include/linux/fs.h index 4f34ff6e5558..b2cdb6bc8287 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -34,9 +34,9 @@ /* And dynamically-tunable limits and defaults: */ struct files_stat_struct { - int nr_files; /* read only */ - int nr_free_files; /* read only */ - int max_files; /* tunable */ + unsigned long nr_files; /* read only */ + unsigned long nr_free_files; /* read only */ + unsigned long max_files; /* tunable */ }; struct inodes_stat_t { @@ -400,7 +400,7 @@ extern void __init inode_init_early(void); extern void __init files_init(unsigned long); extern struct files_stat_struct files_stat; -extern int get_max_files(void); +extern unsigned long get_max_files(void); extern int sysctl_nr_open; extern struct inodes_stat_t inodes_stat; extern int leases_enable, lease_break_time; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3a45c224770f..694b140852c2 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1352,16 +1352,16 @@ static struct ctl_table fs_table[] = { { .procname = "file-nr", .data = &files_stat, - .maxlen = 3*sizeof(int), + .maxlen = sizeof(files_stat), .mode = 0444, .proc_handler = proc_nr_files, }, { .procname = "file-max", .data = &files_stat.max_files, - .maxlen = sizeof(int), + .maxlen = sizeof(files_stat.max_files), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_doulongvec_minmax, }, { .procname = "nr_open", diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 0ebc777a6660..3c95304a0817 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -117,7 +117,7 @@ static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; static DEFINE_SPINLOCK(unix_table_lock); -static atomic_t unix_nr_socks = ATOMIC_INIT(0); +static atomic_long_t unix_nr_socks; #define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) @@ -360,13 +360,13 @@ static void unix_sock_destructor(struct sock *sk) if (u->addr) unix_release_addr(u->addr); - atomic_dec(&unix_nr_socks); + atomic_long_dec(&unix_nr_socks); local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); local_bh_enable(); #ifdef UNIX_REFCNT_DEBUG - printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, - atomic_read(&unix_nr_socks)); + printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk, + atomic_long_read(&unix_nr_socks)); #endif } @@ -606,8 +606,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock) struct sock *sk = NULL; struct unix_sock *u; - atomic_inc(&unix_nr_socks); - if (atomic_read(&unix_nr_socks) > 2 * get_max_files()) + atomic_long_inc(&unix_nr_socks); + if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) goto out; sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto); @@ -632,7 +632,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock) unix_insert_socket(unix_sockets_unbound, sk); out: if (sk == NULL) - atomic_dec(&unix_nr_socks); + atomic_long_dec(&unix_nr_socks); else { local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); -- cgit v1.2.3 From f5d87d851d76a390d0fab2f77bd1d563d69ee586 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 26 Oct 2010 14:22:49 -0700 Subject: printk: declare printk_ratelimit_state in ratelimit.h Adding declaration of printk_ratelimit_state in ratelimit.h removes potential build breakage and following sparse warning: kernel/printk.c:1426:1: warning: symbol 'printk_ratelimit_state' was not declared. Should it be static? [akpm@linux-foundation.org: remove unneeded ifdef] Signed-off-by: Namhyung Kim Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ratelimit.h | 2 ++ kernel/sysctl.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h index 8f69d09a41a5..03ff67b0cdf5 100644 --- a/include/linux/ratelimit.h +++ b/include/linux/ratelimit.h @@ -36,6 +36,8 @@ static inline void ratelimit_state_init(struct ratelimit_state *rs, rs->begin = 0; } +extern struct ratelimit_state printk_ratelimit_state; + extern int ___ratelimit(struct ratelimit_state *rs, const char *func); #define __ratelimit(state) ___ratelimit(state, __func__) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 694b140852c2..48d9d689498f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -161,8 +161,6 @@ extern int no_unaligned_warning; extern int unaligned_dump_stack; #endif -extern struct ratelimit_state printk_ratelimit_state; - #ifdef CONFIG_PROC_SYSCTL static int proc_do_cad_pid(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -- cgit v1.2.3 From eaf06b241b091357e72b76863ba16e89610d31bd Mon Sep 17 00:00:00 2001 From: Dan Rosenberg Date: Thu, 11 Nov 2010 14:05:18 -0800 Subject: Restrict unprivileged access to kernel syslog The kernel syslog contains debugging information that is often useful during exploitation of other vulnerabilities, such as kernel heap addresses. Rather than futilely attempt to sanitize hundreds (or thousands) of printk statements and simultaneously cripple useful debugging functionality, it is far simpler to create an option that prevents unprivileged users from reading the syslog. This patch, loosely based on grsecurity's GRKERNSEC_DMESG, creates the dmesg_restrict sysctl. When set to "0", the default, no restrictions are enforced. When set to "1", only users with CAP_SYS_ADMIN can read the kernel syslog via dmesg(8) or other mechanisms. [akpm@linux-foundation.org: explain the config option in kernel.txt] Signed-off-by: Dan Rosenberg Acked-by: Ingo Molnar Acked-by: Eugene Teo Acked-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/kernel.txt | 14 ++++++++++++++ include/linux/kernel.h | 1 + kernel/printk.c | 6 ++++++ kernel/sysctl.c | 9 +++++++++ security/Kconfig | 12 ++++++++++++ security/commoncap.c | 2 ++ 6 files changed, 44 insertions(+) (limited to 'kernel/sysctl.c') diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 3894eaa23486..209e1584c3dc 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -28,6 +28,7 @@ show up in /proc/sys/kernel: - core_uses_pid - ctrl-alt-del - dentry-state +- dmesg_restrict - domainname - hostname - hotplug @@ -213,6 +214,19 @@ to decide what to do with it. ============================================================== +dmesg_restrict: + +This toggle indicates whether unprivileged users are prevented from using +dmesg(8) to view messages from the kernel's log buffer. When +dmesg_restrict is set to (0) there are no restrictions. When +dmesg_restrict is set set to (1), users must have CAP_SYS_ADMIN to use +dmesg(8). + +The kernel config option CONFIG_SECURITY_DMESG_RESTRICT sets the default +value of dmesg_restrict. + +============================================================== + domainname & hostname: These files can be used to set the NIS/YP domainname and the diff --git a/include/linux/kernel.h b/include/linux/kernel.h index b526947bdf48..fc3da9e4da19 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -293,6 +293,7 @@ extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec); extern int printk_delay_msec; +extern int dmesg_restrict; /* * Print a one-time message (analogous to WARN_ONCE() et al): diff --git a/kernel/printk.c b/kernel/printk.c index b2ebaee8c377..38e7d5868d60 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -261,6 +261,12 @@ static inline void boot_delay_msec(void) } #endif +#ifdef CONFIG_SECURITY_DMESG_RESTRICT +int dmesg_restrict = 1; +#else +int dmesg_restrict; +#endif + int do_syslog(int type, char __user *buf, int len, bool from_file) { unsigned i, j, limit, count; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c33a1edb799f..b65bf634035e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -703,6 +703,15 @@ static struct ctl_table kern_table[] = { .extra2 = &ten_thousand, }, #endif + { + .procname = "dmesg_restrict", + .data = &dmesg_restrict, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, { .procname = "ngroups_max", .data = &ngroups_max, diff --git a/security/Kconfig b/security/Kconfig index bd72ae623494..e80da955e687 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -39,6 +39,18 @@ config KEYS_DEBUG_PROC_KEYS If you are unsure as to whether this is required, answer N. +config SECURITY_DMESG_RESTRICT + bool "Restrict unprivileged access to the kernel syslog" + default n + help + This enforces restrictions on unprivileged users reading the kernel + syslog via dmesg(8). + + If this option is not selected, no restrictions will be enforced + unless the dmesg_restrict sysctl is explicitly set to (1). + + If you are unsure how to answer this question, answer N. + config SECURITY bool "Enable different security models" depends on SYSFS diff --git a/security/commoncap.c b/security/commoncap.c index 5e632b4857e4..04b80f9912bf 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -895,6 +895,8 @@ int cap_syslog(int type, bool from_file) { if (type != SYSLOG_ACTION_OPEN && from_file) return 0; + if (dmesg_restrict && !capable(CAP_SYS_ADMIN)) + return -EPERM; if ((type != SYSLOG_ACTION_READ_ALL && type != SYSLOG_ACTION_SIZE_BUFFER) && !capable(CAP_SYS_ADMIN)) return -EPERM; -- cgit v1.2.3 From df6e61d4ca268dc8706db38222fde9f04701566c Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 15 Nov 2010 21:17:27 -0800 Subject: kernel/sysctl.c: Fix build failure with !CONFIG_PRINTK Sigh... Signed-off-by: Joe Perches Acked-by: Eric Paris Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b65bf634035e..5abfa1518554 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -702,7 +702,6 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &ten_thousand, }, -#endif { .procname = "dmesg_restrict", .data = &dmesg_restrict, @@ -712,6 +711,7 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &one, }, +#endif { .procname = "ngroups_max", .data = &ngroups_max, -- cgit v1.2.3