From 490dea45d00f01847ebebd007685d564aaf2cd98 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 24 Nov 2008 17:06:57 +0100 Subject: itimers: remove the per-cpu-ish-ness Either we bounce once cacheline per cpu per tick, yielding n^2 bounces or we just bounce a single.. Also, using per-cpu allocations for the thread-groups complicates the per-cpu allocator in that its currently aimed to be a fixed sized allocator and the only possible extention to that would be vmap based, which is seriously constrained on 32 bit archs. So making the per-cpu memory requirement depend on the number of processes is an issue. Lastly, it didn't deal with cpu-hotplug, although admittedly that might be fixable. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/fork.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 7b8f2a78be3d..7087d8c0e5e2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -820,14 +820,15 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) int ret; if (clone_flags & CLONE_THREAD) { - ret = thread_group_cputime_clone_thread(current); - if (likely(!ret)) { - atomic_inc(¤t->signal->count); - atomic_inc(¤t->signal->live); - } - return ret; + atomic_inc(¤t->signal->count); + atomic_inc(¤t->signal->live); + return 0; } sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); + + if (sig) + posix_cpu_timers_init_group(sig); + tsk->signal = sig; if (!sig) return -ENOMEM; @@ -864,8 +865,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); task_unlock(current->group_leader); - posix_cpu_timers_init_group(sig); - acct_init_pacct(&sig->pacct); tty_audit_fork(sig); -- cgit v1.2.3 From 783adf42cf039083dd3c734c07c3bdc707e2bb15 Mon Sep 17 00:00:00 2001 From: Steven Noonan Date: Sun, 11 Jan 2009 01:04:21 -0800 Subject: kernel/fork.c: unused variable 'ret' Removed the unused variable. Signed-off-by: Steven Noonan Signed-off-by: Ingo Molnar --- kernel/fork.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index e995899ea83f..81da4aae85cb 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -817,7 +817,6 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig) static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) { struct signal_struct *sig; - int ret; if (clone_flags & CLONE_THREAD) { atomic_inc(¤t->signal->count); -- cgit v1.2.3 From 17da2bd90abf428523de0fb98f7075e00e3ed42e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:10 +0100 Subject: [CVE-2009-0029] System call wrappers part 08 Signed-off-by: Heiko Carstens --- kernel/exit.c | 7 +++---- kernel/fork.c | 2 +- kernel/futex.c | 6 +++--- kernel/module.c | 10 ++++------ kernel/sched.c | 2 +- kernel/signal.c | 18 +++++++----------- 6 files changed, 19 insertions(+), 26 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/exit.c b/kernel/exit.c index 08895df0eab3..f80dec3f1875 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1754,9 +1754,8 @@ end: return retval; } -asmlinkage long sys_waitid(int which, pid_t upid, - struct siginfo __user *infop, int options, - struct rusage __user *ru) +SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, + infop, int, options, struct rusage __user *, ru) { struct pid *pid = NULL; enum pid_type type; @@ -1833,7 +1832,7 @@ SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, * sys_waitpid() remains for compatibility. waitpid() should be * implemented by calling sys_wait4() from libc.a. */ -asmlinkage long sys_waitpid(pid_t pid, int __user *stat_addr, int options) +SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options) { return sys_wait4(pid, stat_addr, options, NULL); } diff --git a/kernel/fork.c b/kernel/fork.c index 1d68f1255dd8..8eb37d38c6a4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -901,7 +901,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p) clear_freeze_flag(p); } -asmlinkage long sys_set_tid_address(int __user *tidptr) +SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) { current->clear_child_tid = tidptr; diff --git a/kernel/futex.c b/kernel/futex.c index 002aa189eb09..e86931d8d4e9 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1978,9 +1978,9 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, } -asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val, - struct timespec __user *utime, u32 __user *uaddr2, - u32 val3) +SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, + struct timespec __user *, utime, u32 __user *, uaddr2, + u32, val3) { struct timespec ts; ktime_t t, *tp = NULL; diff --git a/kernel/module.c b/kernel/module.c index c9332c90d5a0..e8b51d41dd72 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -743,8 +743,8 @@ static void wait_for_zero_refcount(struct module *mod) mutex_lock(&module_mutex); } -asmlinkage long -sys_delete_module(const char __user *name_user, unsigned int flags) +SYSCALL_DEFINE2(delete_module, const char __user *, name_user, + unsigned int, flags) { struct module *mod; char name[MODULE_NAME_LEN]; @@ -2296,10 +2296,8 @@ static noinline struct module *load_module(void __user *umod, } /* This is where the real work happens */ -asmlinkage long -sys_init_module(void __user *umod, - unsigned long len, - const char __user *uargs) +SYSCALL_DEFINE3(init_module, void __user *, umod, + unsigned long, len, const char __user *, uargs) { struct module *mod; int ret = 0; diff --git a/kernel/sched.c b/kernel/sched.c index 65c02037b052..eb1931eef587 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5869,7 +5869,7 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy) * this syscall writes the default timeslice value of a given process * into the user-space timespec buffer. A value of '0' means infinity. */ -SYSCALL_DEFINE4(sched_rr_get_interval, pid_t, pid, +SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, struct timespec __user *, interval) { struct task_struct *p; diff --git a/kernel/signal.c b/kernel/signal.c index 41f32e08615e..278cc8737f17 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2014,8 +2014,8 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset) return error; } -asmlinkage long -sys_rt_sigprocmask(int how, sigset_t __user *set, sigset_t __user *oset, size_t sigsetsize) +SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, set, + sigset_t __user *, oset, size_t, sigsetsize) { int error = -EINVAL; sigset_t old_set, new_set; @@ -2074,8 +2074,7 @@ out: return error; } -asmlinkage long -sys_rt_sigpending(sigset_t __user *set, size_t sigsetsize) +SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, set, size_t, sigsetsize) { return do_sigpending(set, sigsetsize); } @@ -2146,11 +2145,9 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from) #endif -asmlinkage long -sys_rt_sigtimedwait(const sigset_t __user *uthese, - siginfo_t __user *uinfo, - const struct timespec __user *uts, - size_t sigsetsize) +SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese, + siginfo_t __user *, uinfo, const struct timespec __user *, uts, + size_t, sigsetsize) { int ret, sig; sigset_t these; @@ -2223,8 +2220,7 @@ sys_rt_sigtimedwait(const sigset_t __user *uthese, return ret; } -asmlinkage long -sys_kill(pid_t pid, int sig) +SYSCALL_DEFINE2(kill, pid_t, pid, int, sig) { struct siginfo info; -- cgit v1.2.3 From 6559eed8ca7db0531a207cd80be5e28cd6f213c5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:32 +0100 Subject: [CVE-2009-0029] System call wrappers part 30 Signed-off-by: Heiko Carstens --- fs/open.c | 13 ++++++------- fs/stat.c | 12 ++++++------ fs/utimes.c | 6 ++++-- kernel/fork.c | 2 +- 4 files changed, 17 insertions(+), 16 deletions(-) (limited to 'kernel/fork.c') diff --git a/fs/open.c b/fs/open.c index bc49e3c388d9..a3a78ceb2a2b 100644 --- a/fs/open.c +++ b/fs/open.c @@ -447,7 +447,7 @@ SYSCALL_ALIAS(sys_fallocate, SyS_fallocate); * We do this by temporarily clearing all FS-related capabilities and * switching the fsuid/fsgid around to the real ones. */ -asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) +SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) { const struct cred *old_cred; struct cred *override_cred; @@ -628,8 +628,7 @@ out: return err; } -asmlinkage long sys_fchmodat(int dfd, const char __user *filename, - mode_t mode) +SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode) { struct path path; struct inode *inode; @@ -707,8 +706,8 @@ out: return error; } -asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user, - gid_t group, int flag) +SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user, + gid_t, group, int, flag) { struct path path; int error = -EINVAL; @@ -1060,8 +1059,8 @@ SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode) return ret; } -asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, - int mode) +SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, + int, mode) { long ret; diff --git a/fs/stat.c b/fs/stat.c index d712a0dfb50f..2db740a0cfb5 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -260,8 +260,8 @@ SYSCALL_DEFINE2(newlstat, char __user *, filename, struct stat __user *, statbuf } #if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT) -asmlinkage long sys_newfstatat(int dfd, char __user *filename, - struct stat __user *statbuf, int flag) +SYSCALL_DEFINE4(newfstatat, int, dfd, char __user *, filename, + struct stat __user *, statbuf, int, flag) { struct kstat stat; int error = -EINVAL; @@ -293,8 +293,8 @@ SYSCALL_DEFINE2(newfstat, unsigned int, fd, struct stat __user *, statbuf) return error; } -asmlinkage long sys_readlinkat(int dfd, const char __user *pathname, - char __user *buf, int bufsiz) +SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname, + char __user *, buf, int, bufsiz) { struct path path; int error; @@ -400,8 +400,8 @@ SYSCALL_DEFINE2(fstat64, unsigned long, fd, struct stat64 __user *, statbuf) return error; } -asmlinkage long sys_fstatat64(int dfd, char __user *filename, - struct stat64 __user *statbuf, int flag) +SYSCALL_DEFINE4(fstatat64, int, dfd, char __user *, filename, + struct stat64 __user *, statbuf, int, flag) { struct kstat stat; int error = -EINVAL; diff --git a/fs/utimes.c b/fs/utimes.c index ee853615798a..e4c75db5d373 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -170,7 +170,8 @@ out: return error; } -asmlinkage long sys_utimensat(int dfd, char __user *filename, struct timespec __user *utimes, int flags) +SYSCALL_DEFINE4(utimensat, int, dfd, char __user *, filename, + struct timespec __user *, utimes, int, flags) { struct timespec tstimes[2]; @@ -187,7 +188,8 @@ asmlinkage long sys_utimensat(int dfd, char __user *filename, struct timespec __ return do_utimes(dfd, filename, utimes ? tstimes : NULL, flags); } -asmlinkage long sys_futimesat(int dfd, char __user *filename, struct timeval __user *utimes) +SYSCALL_DEFINE3(futimesat, int, dfd, char __user *, filename, + struct timeval __user *, utimes) { struct timeval times[2]; struct timespec tstimes[2]; diff --git a/kernel/fork.c b/kernel/fork.c index 8eb37d38c6a4..bf0cef8bbdf2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1603,7 +1603,7 @@ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp * constructed. Here we are modifying the current, active, * task_struct. */ -asmlinkage long sys_unshare(unsigned long unshare_flags) +SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) { int err = 0; struct fs_struct *fs, *new_fs = NULL; -- cgit v1.2.3 From 32bd671d6cbeda60dc73be77fa2b9037d9a9bfa0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 5 Feb 2009 12:24:15 +0100 Subject: signal: re-add dead task accumulation stats. We're going to split the process wide cpu accounting into two parts: - clocks; which can take all the time they want since they run from user context. - timers; which need constant time tracing but can affort the overhead because they're default off -- and rare. The clock readout will go back to a full sum of the thread group, for this we need to re-add the exit stats that were removed in the initial itimer rework (f06febc9: timers: fix itimer/many thread hang). Furthermore, since that full sum can be rather slow for large thread groups and we have the complete dead task stats, revert the do_notify_parent time computation. Signed-off-by: Peter Zijlstra Reviewed-by: Ingo Molnar Signed-off-by: Ingo Molnar --- include/linux/sched.h | 10 +++++++++- kernel/exit.c | 3 +++ kernel/fork.c | 3 ++- kernel/signal.c | 8 ++++---- 4 files changed, 18 insertions(+), 6 deletions(-) (limited to 'kernel/fork.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2127e959e0f4..2e0646a30314 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -559,7 +559,7 @@ struct signal_struct { * Live threads maintain their own counters and add to these * in __exit_signal, except for the group leader. */ - cputime_t cutime, cstime; + cputime_t utime, stime, cutime, cstime; cputime_t gtime; cputime_t cgtime; unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; @@ -567,6 +567,14 @@ struct signal_struct { unsigned long inblock, oublock, cinblock, coublock; struct task_io_accounting ioac; + /* + * Cumulative ns of schedule CPU time fo dead threads in the + * group, not including a zombie group leader, (This only differs + * from jiffies_to_ns(utime + stime) if sched_clock uses something + * other than jiffies.) + */ + unsigned long long sum_sched_runtime; + /* * We don't bother to synchronize most readers of this at all, * because there is no reader checking a limit that actually needs diff --git a/kernel/exit.c b/kernel/exit.c index f80dec3f1875..efd30ccf3858 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -118,6 +118,8 @@ static void __exit_signal(struct task_struct *tsk) * We won't ever get here for the group leader, since it * will have been the last reference on the signal_struct. */ + sig->utime = cputime_add(sig->utime, task_utime(tsk)); + sig->stime = cputime_add(sig->stime, task_stime(tsk)); sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); sig->min_flt += tsk->min_flt; sig->maj_flt += tsk->maj_flt; @@ -126,6 +128,7 @@ static void __exit_signal(struct task_struct *tsk) sig->inblock += task_io_get_inblock(tsk); sig->oublock += task_io_get_oublock(tsk); task_io_accounting_add(&sig->ioac, &tsk->ioac); + sig->sum_sched_runtime += tsk->se.sum_exec_runtime; sig = NULL; /* Marker for below. */ } diff --git a/kernel/fork.c b/kernel/fork.c index 242a706e7721..e8e854a04ad2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -851,13 +851,14 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->tty_old_pgrp = NULL; sig->tty = NULL; - sig->cutime = sig->cstime = cputime_zero; + sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; sig->gtime = cputime_zero; sig->cgtime = cputime_zero; sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; task_io_accounting_init(&sig->ioac); + sig->sum_sched_runtime = 0; taskstats_tgid_init(sig); task_lock(current->group_leader); diff --git a/kernel/signal.c b/kernel/signal.c index b6b36768b758..2a74fe87c0dd 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1367,7 +1367,6 @@ int do_notify_parent(struct task_struct *tsk, int sig) struct siginfo info; unsigned long flags; struct sighand_struct *psig; - struct task_cputime cputime; int ret = sig; BUG_ON(sig == -1); @@ -1397,9 +1396,10 @@ int do_notify_parent(struct task_struct *tsk, int sig) info.si_uid = __task_cred(tsk)->uid; rcu_read_unlock(); - thread_group_cputime(tsk, &cputime); - info.si_utime = cputime_to_jiffies(cputime.utime); - info.si_stime = cputime_to_jiffies(cputime.stime); + info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime, + tsk->signal->utime)); + info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime, + tsk->signal->stime)); info.si_status = tsk->exit_code & 0x7f; if (tsk->exit_code & 0x80) -- cgit v1.2.3 From 04ec93fe9bc98e3bd8560f79f56fed66dfae40d5 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 6 Feb 2009 08:17:19 +0000 Subject: fork.c: fix NULL pointer dereference when nr_threads == threads-max I happened to forked lots of processes, and hit NULL pointer dereference. It is because in copy_process() after checking max_threads, 0 is returned but not -EAGAIN. The bug is introduced by "CRED: Detach the credentials from task_struct" (commit f1752eec6145c97163dbce62d17cf5d928e28a27). Signed-off-by: Li Zefan Signed-off-by: David Howells Acked-by: James Morris Signed-off-by: Linus Torvalds --- kernel/fork.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 242a706e7721..6d5dbb7a13e2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1005,6 +1005,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ + retval = -EAGAIN; if (nr_threads >= max_threads) goto bad_fork_cleanup_count; -- cgit v1.2.3 From 06eb23b1ba39c61ee5d5faeb42a097635693e370 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 9 Feb 2009 02:02:33 +0100 Subject: ptrace, x86: fix the usage of ptrace_fork() I noticed by pure accident we have ptrace_fork() and friends. This was added by "x86, bts: add fork and exit handling", commit bf53de907dfdaac178c92d774aae7370d7b97d20. I can't test this, ds_request_bts() returns -EOPNOTSUPP, but I strongly believe this needs the fix. I think something like this program int main(void) { int pid = fork(); if (!pid) { ptrace(PTRACE_TRACEME, 0, NULL, NULL); kill(getpid(), SIGSTOP); fork(); } else { struct ptrace_bts_config bts = { .flags = PTRACE_BTS_O_ALLOC, .size = 4 * 4096, }; wait(NULL); ptrace(PTRACE_SETOPTIONS, pid, NULL, PTRACE_O_TRACEFORK); ptrace(PTRACE_BTS_CONFIG, pid, &bts, sizeof(bts)); ptrace(PTRACE_CONT, pid, NULL, NULL); sleep(1); } return 0; } should crash the kernel. If the task is traced by its natural parent ptrace_reparented() returns 0 but we should clear ->btsxxx anyway. Signed-off-by: Oleg Nesterov Acked-by: Markus Metzger Signed-off-by: Ingo Molnar --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 242a706e7721..43c039d55e95 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1093,7 +1093,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif - if (unlikely(ptrace_reparented(current))) + if (unlikely(current->ptrace)) ptrace_fork(p, clone_flags); /* Perform scheduler related setup. Assign this task to a CPU. */ -- cgit v1.2.3