aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2023-07-20 15:05:03 -0700
committerJakub Kicinski <kuba@kernel.org>2023-07-20 15:52:55 -0700
commit59be3baa8dff271d48500e009622318badfc7140 (patch)
treed7fde1af4197df9ece8ca26fb492051df2ef80f1 /kernel
parenttcp: add TCP_OLD_SEQUENCE drop reason (diff)
parentMerge tag 'net-6.5-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/netd... (diff)
downloadlinux-59be3baa8dff271d48500e009622318badfc7140.tar.gz
linux-59be3baa8dff271d48500e009622318badfc7140.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR. No conflicts or adjacent changes. Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/verifier.c32
-rw-r--r--kernel/cgroup/cgroup.c2
-rw-r--r--kernel/kallsyms.c5
-rw-r--r--kernel/power/hibernate.c1
-rw-r--r--kernel/power/qos.c9
-rw-r--r--kernel/sched/fair.c2
-rw-r--r--kernel/sched/psi.c29
-rw-r--r--kernel/sys.c10
-rw-r--r--kernel/trace/fprobe.c6
-rw-r--r--kernel/trace/trace.h2
-rw-r--r--kernel/trace/trace_probe.c2
-rw-r--r--kernel/trace/trace_probe_kernel.h30
-rw-r--r--kernel/trace/trace_probe_tmpl.h10
-rw-r--r--kernel/trace/trace_uprobe.c3
14 files changed, 87 insertions, 56 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 803b91135ca0..71473c19093d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5587,16 +5587,17 @@ static int update_stack_depth(struct bpf_verifier_env *env,
* Since recursion is prevented by check_cfg() this algorithm
* only needs a local stack of MAX_CALL_FRAMES to remember callsites
*/
-static int check_max_stack_depth(struct bpf_verifier_env *env)
+static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx)
{
- int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
struct bpf_subprog_info *subprog = env->subprog_info;
struct bpf_insn *insn = env->prog->insnsi;
+ int depth = 0, frame = 0, i, subprog_end;
bool tail_call_reachable = false;
int ret_insn[MAX_CALL_FRAMES];
int ret_prog[MAX_CALL_FRAMES];
int j;
+ i = subprog[idx].start;
process_func:
/* protect against potential stack overflow that might happen when
* bpf2bpf calls get combined with tailcalls. Limit the caller's stack
@@ -5635,7 +5636,7 @@ process_func:
continue_func:
subprog_end = subprog[idx + 1].start;
for (; i < subprog_end; i++) {
- int next_insn;
+ int next_insn, sidx;
if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
continue;
@@ -5645,14 +5646,14 @@ continue_func:
/* find the callee */
next_insn = i + insn[i].imm + 1;
- idx = find_subprog(env, next_insn);
- if (idx < 0) {
+ sidx = find_subprog(env, next_insn);
+ if (sidx < 0) {
WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
next_insn);
return -EFAULT;
}
- if (subprog[idx].is_async_cb) {
- if (subprog[idx].has_tail_call) {
+ if (subprog[sidx].is_async_cb) {
+ if (subprog[sidx].has_tail_call) {
verbose(env, "verifier bug. subprog has tail_call and async cb\n");
return -EFAULT;
}
@@ -5661,6 +5662,7 @@ continue_func:
continue;
}
i = next_insn;
+ idx = sidx;
if (subprog[idx].has_tail_call)
tail_call_reachable = true;
@@ -5696,6 +5698,22 @@ continue_func:
goto continue_func;
}
+static int check_max_stack_depth(struct bpf_verifier_env *env)
+{
+ struct bpf_subprog_info *si = env->subprog_info;
+ int ret;
+
+ for (int i = 0; i < env->subprog_cnt; i++) {
+ if (!i || si[i].is_async_cb) {
+ ret = check_max_stack_depth_subprog(env, i);
+ if (ret < 0)
+ return ret;
+ }
+ continue;
+ }
+ return 0;
+}
+
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
static int get_callee_stack_depth(struct bpf_verifier_env *env,
const struct bpf_insn *insn, int idx)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index bfe3cd8ccf36..f55a40db065f 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3730,7 +3730,7 @@ static ssize_t pressure_write(struct kernfs_open_file *of, char *buf,
}
psi = cgroup_psi(cgrp);
- new = psi_trigger_create(psi, buf, res, of->file);
+ new = psi_trigger_create(psi, buf, res, of->file, of);
if (IS_ERR(new)) {
cgroup_put(cgrp);
return PTR_ERR(new);
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 7982cc9d497c..016d997131d4 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -174,11 +174,10 @@ static bool cleanup_symbol_name(char *s)
* LLVM appends various suffixes for local functions and variables that
* must be promoted to global scope as part of LTO. This can break
* hooking of static functions with kprobes. '.' is not a valid
- * character in an identifier in C. Suffixes observed:
+ * character in an identifier in C. Suffixes only in LLVM LTO observed:
* - foo.llvm.[0-9a-f]+
- * - foo.[0-9a-f]+
*/
- res = strchr(s, '.');
+ res = strstr(s, ".llvm.");
if (res) {
*res = '\0';
return true;
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index f62e89d0d906..e1b4bfa938dd 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -1179,6 +1179,7 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
unsigned maj, min, offset;
char *p, dummy;
+ error = 0;
if (sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2 ||
sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset,
&dummy) == 3) {
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index af51ed6d45ef..782d3b41c1f3 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -426,6 +426,11 @@ late_initcall(cpu_latency_qos_init);
/* Definitions related to the frequency QoS below. */
+static inline bool freq_qos_value_invalid(s32 value)
+{
+ return value < 0 && value != PM_QOS_DEFAULT_VALUE;
+}
+
/**
* freq_constraints_init - Initialize frequency QoS constraints.
* @qos: Frequency QoS constraints to initialize.
@@ -531,7 +536,7 @@ int freq_qos_add_request(struct freq_constraints *qos,
{
int ret;
- if (IS_ERR_OR_NULL(qos) || !req || value < 0)
+ if (IS_ERR_OR_NULL(qos) || !req || freq_qos_value_invalid(value))
return -EINVAL;
if (WARN(freq_qos_request_active(req),
@@ -563,7 +568,7 @@ EXPORT_SYMBOL_GPL(freq_qos_add_request);
*/
int freq_qos_update_request(struct freq_qos_request *req, s32 new_value)
{
- if (!req || new_value < 0)
+ if (!req || freq_qos_value_invalid(new_value))
return -EINVAL;
if (WARN(!freq_qos_request_active(req),
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a80a73909dc2..b3e25be58e2b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7174,7 +7174,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
recent_used_cpu != target &&
cpus_share_cache(recent_used_cpu, target) &&
(available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
- cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) &&
+ cpumask_test_cpu(recent_used_cpu, p->cpus_ptr) &&
asym_fits_cpu(task_util, util_min, util_max, recent_used_cpu)) {
return recent_used_cpu;
}
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index 81fca77397f6..9bb3f2b3ccfc 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -493,8 +493,12 @@ static u64 update_triggers(struct psi_group *group, u64 now, bool *update_total,
continue;
/* Generate an event */
- if (cmpxchg(&t->event, 0, 1) == 0)
- wake_up_interruptible(&t->event_wait);
+ if (cmpxchg(&t->event, 0, 1) == 0) {
+ if (t->of)
+ kernfs_notify(t->of->kn);
+ else
+ wake_up_interruptible(&t->event_wait);
+ }
t->last_event_time = now;
/* Reset threshold breach flag once event got generated */
t->pending_event = false;
@@ -1271,8 +1275,9 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
return 0;
}
-struct psi_trigger *psi_trigger_create(struct psi_group *group,
- char *buf, enum psi_res res, struct file *file)
+struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
+ enum psi_res res, struct file *file,
+ struct kernfs_open_file *of)
{
struct psi_trigger *t;
enum psi_states state;
@@ -1331,7 +1336,9 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
t->event = 0;
t->last_event_time = 0;
- init_waitqueue_head(&t->event_wait);
+ t->of = of;
+ if (!of)
+ init_waitqueue_head(&t->event_wait);
t->pending_event = false;
t->aggregator = privileged ? PSI_POLL : PSI_AVGS;
@@ -1388,7 +1395,10 @@ void psi_trigger_destroy(struct psi_trigger *t)
* being accessed later. Can happen if cgroup is deleted from under a
* polling process.
*/
- wake_up_pollfree(&t->event_wait);
+ if (t->of)
+ kernfs_notify(t->of->kn);
+ else
+ wake_up_interruptible(&t->event_wait);
if (t->aggregator == PSI_AVGS) {
mutex_lock(&group->avgs_lock);
@@ -1465,7 +1475,10 @@ __poll_t psi_trigger_poll(void **trigger_ptr,
if (!t)
return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
- poll_wait(file, &t->event_wait, wait);
+ if (t->of)
+ kernfs_generic_poll(t->of, wait);
+ else
+ poll_wait(file, &t->event_wait, wait);
if (cmpxchg(&t->event, 1, 0) == 1)
ret |= EPOLLPRI;
@@ -1535,7 +1548,7 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
return -EBUSY;
}
- new = psi_trigger_create(&psi_system, buf, res, file);
+ new = psi_trigger_create(&psi_system, buf, res, file, NULL);
if (IS_ERR(new)) {
mutex_unlock(&seq->lock);
return PTR_ERR(new);
diff --git a/kernel/sys.c b/kernel/sys.c
index 05f838929e72..2410e3999ebe 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2535,11 +2535,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
else
return -EINVAL;
break;
- case PR_GET_AUXV:
- if (arg4 || arg5)
- return -EINVAL;
- error = prctl_get_auxv((void __user *)arg2, arg3);
- break;
default:
return -EINVAL;
}
@@ -2694,6 +2689,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_SET_VMA:
error = prctl_set_vma(arg2, arg3, arg4, arg5);
break;
+ case PR_GET_AUXV:
+ if (arg4 || arg5)
+ return -EINVAL;
+ error = prctl_get_auxv((void __user *)arg2, arg3);
+ break;
#ifdef CONFIG_KSM
case PR_SET_MEMORY_MERGE:
if (arg3 || arg4 || arg5)
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index b70de44e6d3d..3b21f4063258 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -100,6 +100,12 @@ static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
return;
}
+ /*
+ * This user handler is shared with other kprobes and is not expected to be
+ * called recursively. So if any other kprobe handler is running, this will
+ * exit as kprobe does. See the section 'Share the callbacks with kprobes'
+ * in Documentation/trace/fprobe.rst for more information.
+ */
if (unlikely(kprobe_running())) {
fp->nmissed++;
goto recursion_unlock;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index ed7906b13f09..e1edc2197fc8 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -113,6 +113,8 @@ enum trace_type {
#define MEM_FAIL(condition, fmt, ...) \
DO_ONCE_LITE_IF(condition, pr_err, "ERROR: " fmt, ##__VA_ARGS__)
+#define FAULT_STRING "(fault)"
+
#define HIST_STACKTRACE_DEPTH 16
#define HIST_STACKTRACE_SIZE (HIST_STACKTRACE_DEPTH * sizeof(unsigned long))
#define HIST_STACKTRACE_SKIP 5
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 7ba371da0926..b2b726bea1f9 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -67,7 +67,7 @@ int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, void *data, void *ent)
int len = *(u32 *)data >> 16;
if (!len)
- trace_seq_puts(s, "(fault)");
+ trace_seq_puts(s, FAULT_STRING);
else
trace_seq_printf(s, "\"%s\"",
(const char *)get_loc_data(data, ent));
diff --git a/kernel/trace/trace_probe_kernel.h b/kernel/trace/trace_probe_kernel.h
index c4e1d4c03a85..bb723eefd7b7 100644
--- a/kernel/trace/trace_probe_kernel.h
+++ b/kernel/trace/trace_probe_kernel.h
@@ -2,8 +2,6 @@
#ifndef __TRACE_PROBE_KERNEL_H_
#define __TRACE_PROBE_KERNEL_H_
-#define FAULT_STRING "(fault)"
-
/*
* This depends on trace_probe.h, but can not include it due to
* the way trace_probe_tmpl.h is used by trace_kprobe.c and trace_eprobe.c.
@@ -15,16 +13,8 @@ static nokprobe_inline int
fetch_store_strlen_user(unsigned long addr)
{
const void __user *uaddr = (__force const void __user *)addr;
- int ret;
- ret = strnlen_user_nofault(uaddr, MAX_STRING_SIZE);
- /*
- * strnlen_user_nofault returns zero on fault, insert the
- * FAULT_STRING when that occurs.
- */
- if (ret <= 0)
- return strlen(FAULT_STRING) + 1;
- return ret;
+ return strnlen_user_nofault(uaddr, MAX_STRING_SIZE);
}
/* Return the length of string -- including null terminal byte */
@@ -44,18 +34,14 @@ fetch_store_strlen(unsigned long addr)
len++;
} while (c && ret == 0 && len < MAX_STRING_SIZE);
- /* For faults, return enough to hold the FAULT_STRING */
- return (ret < 0) ? strlen(FAULT_STRING) + 1 : len;
+ return (ret < 0) ? ret : len;
}
-static nokprobe_inline void set_data_loc(int ret, void *dest, void *__dest, void *base, int len)
+static nokprobe_inline void set_data_loc(int ret, void *dest, void *__dest, void *base)
{
- if (ret >= 0) {
- *(u32 *)dest = make_data_loc(ret, __dest - base);
- } else {
- strscpy(__dest, FAULT_STRING, len);
- ret = strlen(__dest) + 1;
- }
+ if (ret < 0)
+ ret = 0;
+ *(u32 *)dest = make_data_loc(ret, __dest - base);
}
/*
@@ -76,7 +62,7 @@ fetch_store_string_user(unsigned long addr, void *dest, void *base)
__dest = get_loc_data(dest, base);
ret = strncpy_from_user_nofault(__dest, uaddr, maxlen);
- set_data_loc(ret, dest, __dest, base, maxlen);
+ set_data_loc(ret, dest, __dest, base);
return ret;
}
@@ -107,7 +93,7 @@ fetch_store_string(unsigned long addr, void *dest, void *base)
* probing.
*/
ret = strncpy_from_kernel_nofault(__dest, (void *)addr, maxlen);
- set_data_loc(ret, dest, __dest, base, maxlen);
+ set_data_loc(ret, dest, __dest, base);
return ret;
}
diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h
index 00707630788d..3935b347f874 100644
--- a/kernel/trace/trace_probe_tmpl.h
+++ b/kernel/trace/trace_probe_tmpl.h
@@ -156,11 +156,11 @@ stage3:
code++;
goto array;
case FETCH_OP_ST_USTRING:
- ret += fetch_store_strlen_user(val + code->offset);
+ ret = fetch_store_strlen_user(val + code->offset);
code++;
goto array;
case FETCH_OP_ST_SYMSTR:
- ret += fetch_store_symstrlen(val + code->offset);
+ ret = fetch_store_symstrlen(val + code->offset);
code++;
goto array;
default:
@@ -204,6 +204,8 @@ stage3:
array:
/* the last stage: Loop on array */
if (code->op == FETCH_OP_LP_ARRAY) {
+ if (ret < 0)
+ ret = 0;
total += ret;
if (++i < code->param) {
code = s3;
@@ -265,9 +267,7 @@ store_trace_args(void *data, struct trace_probe *tp, void *rec,
if (unlikely(arg->dynamic))
*dl = make_data_loc(maxlen, dyndata - base);
ret = process_fetch_insn(arg->code, rec, dl, base);
- if (unlikely(ret < 0 && arg->dynamic)) {
- *dl = make_data_loc(0, dyndata - base);
- } else {
+ if (arg->dynamic && likely(ret > 0)) {
dyndata += ret;
maxlen -= ret;
}
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index eaa800530e92..555c223c3232 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -170,7 +170,8 @@ fetch_store_string(unsigned long addr, void *dest, void *base)
*/
ret++;
*(u32 *)dest = make_data_loc(ret, (void *)dst - base);
- }
+ } else
+ *(u32 *)dest = make_data_loc(0, (void *)dst - base);
return ret;
}