From f99bf205dab026ef434520198af2fcb7dae0efdb Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 19 Nov 2015 11:56:22 +0100 Subject: bpf: add show_fdinfo handler for maps Add a handler for show_fdinfo() to be used by the anon-inodes backend for eBPF maps, and dump the map specification there. Not only useful for admins, but also it provides a minimal way to compare specs from ELF vs pinned object. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- kernel/bpf/syscall.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0d3313d02a7e..6d1407bc1531 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -93,6 +93,23 @@ void bpf_map_put(struct bpf_map *map) } } +#ifdef CONFIG_PROC_FS +static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) +{ + const struct bpf_map *map = filp->private_data; + + seq_printf(m, + "map_type:\t%u\n" + "key_size:\t%u\n" + "value_size:\t%u\n" + "max_entries:\t%u\n", + map->map_type, + map->key_size, + map->value_size, + map->max_entries); +} +#endif + static int bpf_map_release(struct inode *inode, struct file *filp) { struct bpf_map *map = filp->private_data; @@ -108,7 +125,10 @@ static int bpf_map_release(struct inode *inode, struct file *filp) } static const struct file_operations bpf_map_fops = { - .release = bpf_map_release, +#ifdef CONFIG_PROC_FS + .show_fdinfo = bpf_map_show_fdinfo, +#endif + .release = bpf_map_release, }; int bpf_map_new_fd(struct bpf_map *map) -- cgit v1.2.3 From bb35a6ef7da492e7df1fe8772716ff88c172b4cc Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 10 Dec 2015 22:33:49 +0100 Subject: bpf, inode: allow for rename and link ops Add support for renaming and hard links to the fs. Most of this can be implemented by using simple library operations under the same constraints that we don't use a reserved name like elsewhere. Linking can be useful to share/manage things like maps across subsystem users. It works within the file system boundary, but is not allowed for directories. Symbolic links are explicitly not implemented here, as it can be better done already by doing bind mounts inside bpf fs to set up shared directories f.e. useful when using volumes in docker containers that map a private working directory into /sys/fs/bpf/ which contains itself a bind mounted path from the host's /sys/fs/bpf/ mount that is shared among multiple containers. For single maps instead of whole directory, hard links can be easily used to do the same. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/inode.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'kernel/bpf') diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 5a8a797d50b7..f2ece3c174a5 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -187,11 +187,31 @@ static int bpf_mkobj(struct inode *dir, struct dentry *dentry, umode_t mode, } } +static int bpf_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry) +{ + if (bpf_dname_reserved(new_dentry)) + return -EPERM; + + return simple_link(old_dentry, dir, new_dentry); +} + +static int bpf_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + if (bpf_dname_reserved(new_dentry)) + return -EPERM; + + return simple_rename(old_dir, old_dentry, new_dir, new_dentry); +} + static const struct inode_operations bpf_dir_iops = { .lookup = simple_lookup, .mknod = bpf_mkobj, .mkdir = bpf_mkdir, .rmdir = simple_rmdir, + .rename = bpf_rename, + .link = bpf_link, .unlink = simple_unlink, }; -- cgit v1.2.3 From 8b614aebecdf2b1f72d51b1527f5a75d218b78e2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 17 Dec 2015 23:51:54 +0100 Subject: bpf: move clearing of A/X into classic to eBPF migration prologue Back in the days where eBPF (or back then "internal BPF" ;->) was not exposed to user space, and only the classic BPF programs internally translated into eBPF programs, we missed the fact that for classic BPF A and X needed to be cleared. It was fixed back then via 83d5b7ef99c9 ("net: filter: initialize A and X registers"), and thus classic BPF specifics were added to the eBPF interpreter core to work around it. This added some confusion for JIT developers later on that take the eBPF interpreter code as an example for deriving their JIT. F.e. in f75298f5c3fe ("s390/bpf: clear correct BPF accumulator register"), at least X could leak stack memory. Furthermore, since this is only needed for classic BPF translations and not for eBPF (verifier takes care that read access to regs cannot be done uninitialized), more complexity is added to JITs as they need to determine whether they deal with migrations or native eBPF where they can just omit clearing A/X in their prologue and thus reduce image size a bit, see f.e. cde66c2d88da ("s390/bpf: Only clear A and X for converted BPF programs"). In other cases (x86, arm64), A and X is being cleared in the prologue also for eBPF case, which is unnecessary. Lets move this into the BPF migration in bpf_convert_filter() where it actually belongs as long as the number of eBPF JITs are still few. It can thus be done generically; allowing us to remove the quirk from __bpf_prog_run() and to slightly reduce JIT image size in case of eBPF, while reducing code duplication on this matter in current(/future) eBPF JITs. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Reviewed-by: Michael Holzheu Tested-by: Michael Holzheu Cc: Zi Shen Lim Cc: Yang Shi Acked-by: Yang Shi Acked-by: Zi Shen Lim Signed-off-by: David S. Miller --- arch/arm64/net/bpf_jit_comp.c | 6 ------ arch/s390/net/bpf_jit_comp.c | 13 ++----------- arch/x86/net/bpf_jit_comp.c | 14 +++++++++----- kernel/bpf/core.c | 4 ---- net/core/filter.c | 19 ++++++++++++++++--- 5 files changed, 27 insertions(+), 29 deletions(-) (limited to 'kernel/bpf') diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index b162ad70effc..7658612d915c 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -152,8 +152,6 @@ static void build_prologue(struct jit_ctx *ctx) const u8 r8 = bpf2a64[BPF_REG_8]; const u8 r9 = bpf2a64[BPF_REG_9]; const u8 fp = bpf2a64[BPF_REG_FP]; - const u8 ra = bpf2a64[BPF_REG_A]; - const u8 rx = bpf2a64[BPF_REG_X]; const u8 tmp1 = bpf2a64[TMP_REG_1]; const u8 tmp2 = bpf2a64[TMP_REG_2]; @@ -200,10 +198,6 @@ static void build_prologue(struct jit_ctx *ctx) /* Set up function call stack */ emit(A64_SUB_I(1, A64_SP, A64_SP, STACK_SIZE), ctx); - - /* Clear registers A and X */ - emit_a64_mov_i64(ra, 0, ctx); - emit_a64_mov_i64(rx, 0, ctx); } static void build_epilogue(struct jit_ctx *ctx) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 9a0c4c22e536..3c0bfc1f2694 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -408,7 +408,7 @@ static void emit_load_skb_data_hlen(struct bpf_jit *jit) * Save registers and create stack frame if necessary. * See stack frame layout desription in "bpf_jit.h"! */ -static void bpf_jit_prologue(struct bpf_jit *jit, bool is_classic) +static void bpf_jit_prologue(struct bpf_jit *jit) { if (jit->seen & SEEN_TAIL_CALL) { /* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */ @@ -448,15 +448,6 @@ static void bpf_jit_prologue(struct bpf_jit *jit, bool is_classic) /* stg %b1,ST_OFF_SKBP(%r0,%r15) */ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, STK_OFF_SKBP); - /* Clear A (%b0) and X (%b7) registers for converted BPF programs */ - if (is_classic) { - if (REG_SEEN(BPF_REG_A)) - /* lghi %ba,0 */ - EMIT4_IMM(0xa7090000, BPF_REG_A, 0); - if (REG_SEEN(BPF_REG_X)) - /* lghi %bx,0 */ - EMIT4_IMM(0xa7090000, BPF_REG_X, 0); - } } /* @@ -1245,7 +1236,7 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp) jit->lit = jit->lit_start; jit->prg = 0; - bpf_jit_prologue(jit, bpf_prog_was_classic(fp)); + bpf_jit_prologue(jit); for (i = 0; i < fp->len; i += insn_count) { insn_count = bpf_jit_insn(jit, fp, i); if (insn_count < 0) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 75991979f667..c080e812ce85 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -193,7 +193,7 @@ struct jit_context { 32 /* space for rbx, r13, r14, r15 */ + \ 8 /* space for skb_copy_bits() buffer */) -#define PROLOGUE_SIZE 51 +#define PROLOGUE_SIZE 48 /* emit x64 prologue code for BPF program and check it's size. * bpf_tail_call helper will skip it while jumping into another program @@ -229,11 +229,15 @@ static void emit_prologue(u8 **pprog) /* mov qword ptr [rbp-X],r15 */ EMIT3_off32(0x4C, 0x89, 0xBD, -STACKSIZE + 24); - /* clear A and X registers */ - EMIT2(0x31, 0xc0); /* xor eax, eax */ - EMIT3(0x4D, 0x31, 0xED); /* xor r13, r13 */ + /* Clear the tail call counter (tail_call_cnt): for eBPF tail calls + * we need to reset the counter to 0. It's done in two instructions, + * resetting rax register to 0 (xor on eax gets 0 extended), and + * moving it to the counter location. + */ - /* clear tail_cnt: mov qword ptr [rbp-X], rax */ + /* xor eax, eax */ + EMIT2(0x31, 0xc0); + /* mov qword ptr [rbp-X], rax */ EMIT3_off32(0x48, 0x89, 0x85, -STACKSIZE + 32); BUILD_BUG_ON(cnt != PROLOGUE_SIZE); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 334b1bdd572c..972d9a8e4ac4 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -306,10 +306,6 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; ARG1 = (u64) (unsigned long) ctx; - /* Registers used in classic BPF programs need to be reset first. */ - regs[BPF_REG_A] = 0; - regs[BPF_REG_X] = 0; - select_insn: goto *jumptable[insn->code]; diff --git a/net/core/filter.c b/net/core/filter.c index 34bf6fc77c1d..b513eb871839 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -381,9 +381,22 @@ do_pass: new_insn = new_prog; fp = prog; - if (new_insn) - *new_insn = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1); - new_insn++; + /* Classic BPF related prologue emission. */ + if (new_insn) { + /* Classic BPF expects A and X to be reset first. These need + * to be guaranteed to be the first two instructions. + */ + *new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_A, BPF_REG_A); + *new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_X, BPF_REG_X); + + /* All programs must keep CTX in callee saved BPF_REG_CTX. + * In eBPF case it's done by the compiler, here we need to + * do this ourself. Initial CTX is present in BPF_REG_ARG1. + */ + *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1); + } else { + new_insn += 3; + } for (i = 0; i < len; fp++, i++) { struct bpf_insn tmp_insns[6] = { }; -- cgit v1.2.3 From 6591f1e6662dd595effb52a54e42a6d2d2b03e51 Mon Sep 17 00:00:00 2001 From: "tom.leiming@gmail.com" Date: Tue, 29 Dec 2015 22:40:25 +0800 Subject: bpf: hash: use atomic count Preparing for removing global per-hashtable lock, so the counter need to be defined as aotmic_t first. Acked-by: Daniel Borkmann Signed-off-by: Ming Lei Signed-off-by: David S. Miller --- kernel/bpf/hashtab.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 34777b3746fa..2615388009a4 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -18,7 +18,7 @@ struct bpf_htab { struct bpf_map map; struct hlist_head *buckets; raw_spinlock_t lock; - u32 count; /* number of elements in this hashtable */ + atomic_t count; /* number of elements in this hashtable */ u32 n_buckets; /* number of hash buckets */ u32 elem_size; /* size of each element in bytes */ }; @@ -106,7 +106,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) INIT_HLIST_HEAD(&htab->buckets[i]); raw_spin_lock_init(&htab->lock); - htab->count = 0; + atomic_set(&htab->count, 0); return &htab->map; @@ -256,7 +256,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, l_old = lookup_elem_raw(head, l_new->hash, key, key_size); - if (!l_old && unlikely(htab->count >= map->max_entries)) { + if (!l_old && unlikely(atomic_read(&htab->count) >= map->max_entries)) { /* if elem with this 'key' doesn't exist and we've reached * max_entries limit, fail insertion of new elem */ @@ -284,7 +284,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, hlist_del_rcu(&l_old->hash_node); kfree_rcu(l_old, rcu); } else { - htab->count++; + atomic_inc(&htab->count); } raw_spin_unlock_irqrestore(&htab->lock, flags); @@ -319,7 +319,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key) if (l) { hlist_del_rcu(&l->hash_node); - htab->count--; + atomic_dec(&htab->count); kfree_rcu(l, rcu); ret = 0; } @@ -339,7 +339,7 @@ static void delete_all_elements(struct bpf_htab *htab) hlist_for_each_entry_safe(l, n, head, hash_node) { hlist_del_rcu(&l->hash_node); - htab->count--; + atomic_dec(&htab->count); kfree(l); } } -- cgit v1.2.3 From 45d8390c56bd2851097736c1c20ad958880168df Mon Sep 17 00:00:00 2001 From: "tom.leiming@gmail.com" Date: Tue, 29 Dec 2015 22:40:26 +0800 Subject: bpf: hash: move select_bucket() out of htab's spinlock The spinlock is just used for protecting the per-bucket hlist, so it isn't needed for selecting bucket. Acked-by: Daniel Borkmann Signed-off-by: Ming Lei Signed-off-by: David S. Miller --- kernel/bpf/hashtab.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 2615388009a4..d857fcb3607b 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -248,12 +248,11 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, memcpy(l_new->key + round_up(key_size, 8), value, map->value_size); l_new->hash = htab_map_hash(l_new->key, key_size); + head = select_bucket(htab, l_new->hash); /* bpf_map_update_elem() can be called in_irq() */ raw_spin_lock_irqsave(&htab->lock, flags); - head = select_bucket(htab, l_new->hash); - l_old = lookup_elem_raw(head, l_new->hash, key, key_size); if (!l_old && unlikely(atomic_read(&htab->count) >= map->max_entries)) { @@ -310,11 +309,10 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key) key_size = map->key_size; hash = htab_map_hash(key, key_size); + head = select_bucket(htab, hash); raw_spin_lock_irqsave(&htab->lock, flags); - head = select_bucket(htab, hash); - l = lookup_elem_raw(head, hash, key, key_size); if (l) { -- cgit v1.2.3 From 688ecfe60220516e8b6707c832ec02e92522dd85 Mon Sep 17 00:00:00 2001 From: "tom.leiming@gmail.com" Date: Tue, 29 Dec 2015 22:40:27 +0800 Subject: bpf: hash: use per-bucket spinlock Both htab_map_update_elem() and htab_map_delete_elem() can be called from eBPF program, and they may be in kernel hot path, so it isn't efficient to use a per-hashtable lock in this two helpers. The per-hashtable spinlock is used for protecting bucket's hlist, and per-bucket lock is just enough. This patch converts the per-hashtable lock into per-bucket spinlock, so that contention can be decreased a lot. Signed-off-by: Ming Lei Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/hashtab.c | 50 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 18 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index d857fcb3607b..c5b30fd8a315 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -14,10 +14,14 @@ #include #include +struct bucket { + struct hlist_head head; + raw_spinlock_t lock; +}; + struct bpf_htab { struct bpf_map map; - struct hlist_head *buckets; - raw_spinlock_t lock; + struct bucket *buckets; atomic_t count; /* number of elements in this hashtable */ u32 n_buckets; /* number of hash buckets */ u32 elem_size; /* size of each element in bytes */ @@ -79,33 +83,34 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) /* prevent zero size kmalloc and check for u32 overflow */ if (htab->n_buckets == 0 || - htab->n_buckets > U32_MAX / sizeof(struct hlist_head)) + htab->n_buckets > U32_MAX / sizeof(struct bucket)) goto free_htab; - if ((u64) htab->n_buckets * sizeof(struct hlist_head) + + if ((u64) htab->n_buckets * sizeof(struct bucket) + (u64) htab->elem_size * htab->map.max_entries >= U32_MAX - PAGE_SIZE) /* make sure page count doesn't overflow */ goto free_htab; - htab->map.pages = round_up(htab->n_buckets * sizeof(struct hlist_head) + + htab->map.pages = round_up(htab->n_buckets * sizeof(struct bucket) + htab->elem_size * htab->map.max_entries, PAGE_SIZE) >> PAGE_SHIFT; err = -ENOMEM; - htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct hlist_head), + htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct bucket), GFP_USER | __GFP_NOWARN); if (!htab->buckets) { - htab->buckets = vmalloc(htab->n_buckets * sizeof(struct hlist_head)); + htab->buckets = vmalloc(htab->n_buckets * sizeof(struct bucket)); if (!htab->buckets) goto free_htab; } - for (i = 0; i < htab->n_buckets; i++) - INIT_HLIST_HEAD(&htab->buckets[i]); + for (i = 0; i < htab->n_buckets; i++) { + INIT_HLIST_HEAD(&htab->buckets[i].head); + raw_spin_lock_init(&htab->buckets[i].lock); + } - raw_spin_lock_init(&htab->lock); atomic_set(&htab->count, 0); return &htab->map; @@ -120,11 +125,16 @@ static inline u32 htab_map_hash(const void *key, u32 key_len) return jhash(key, key_len, 0); } -static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash) +static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash) { return &htab->buckets[hash & (htab->n_buckets - 1)]; } +static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash) +{ + return &__select_bucket(htab, hash)->head; +} + static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash, void *key, u32 key_size) { @@ -227,6 +237,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct htab_elem *l_new, *l_old; struct hlist_head *head; + struct bucket *b; unsigned long flags; u32 key_size; int ret; @@ -248,10 +259,11 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, memcpy(l_new->key + round_up(key_size, 8), value, map->value_size); l_new->hash = htab_map_hash(l_new->key, key_size); - head = select_bucket(htab, l_new->hash); + b = __select_bucket(htab, l_new->hash); + head = &b->head; /* bpf_map_update_elem() can be called in_irq() */ - raw_spin_lock_irqsave(&htab->lock, flags); + raw_spin_lock_irqsave(&b->lock, flags); l_old = lookup_elem_raw(head, l_new->hash, key, key_size); @@ -285,11 +297,11 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, } else { atomic_inc(&htab->count); } - raw_spin_unlock_irqrestore(&htab->lock, flags); + raw_spin_unlock_irqrestore(&b->lock, flags); return 0; err: - raw_spin_unlock_irqrestore(&htab->lock, flags); + raw_spin_unlock_irqrestore(&b->lock, flags); kfree(l_new); return ret; } @@ -299,6 +311,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct hlist_head *head; + struct bucket *b; struct htab_elem *l; unsigned long flags; u32 hash, key_size; @@ -309,9 +322,10 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key) key_size = map->key_size; hash = htab_map_hash(key, key_size); - head = select_bucket(htab, hash); + b = __select_bucket(htab, hash); + head = &b->head; - raw_spin_lock_irqsave(&htab->lock, flags); + raw_spin_lock_irqsave(&b->lock, flags); l = lookup_elem_raw(head, hash, key, key_size); @@ -322,7 +336,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key) ret = 0; } - raw_spin_unlock_irqrestore(&htab->lock, flags); + raw_spin_unlock_irqrestore(&b->lock, flags); return ret; } -- cgit v1.2.3 From 229394e8e62a4191d592842cf67e80c62a492937 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Tue, 12 Jan 2016 20:17:08 +0100 Subject: net: bpf: reject invalid shifts On ARM64, a BUG() is triggered in the eBPF JIT if a filter with a constant shift that can't be encoded in the immediate field of the UBFM/SBFM instructions is passed to the JIT. Since these shifts amounts, which are negative or >= regsize, are invalid, reject them in the eBPF verifier and the classic BPF filter checker, for all architectures. Signed-off-by: Rabin Vincent Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 10 ++++++++++ net/core/filter.c | 5 +++++ 2 files changed, 15 insertions(+) (limited to 'kernel/bpf') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a7945d10b378..d1d3e8f57de9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1121,6 +1121,16 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) return -EINVAL; } + if ((opcode == BPF_LSH || opcode == BPF_RSH || + opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) { + int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32; + + if (insn->imm < 0 || insn->imm >= size) { + verbose("invalid shift %d\n", insn->imm); + return -EINVAL; + } + } + /* pattern match 'bpf_add Rx, imm' instruction */ if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 && regs[insn->dst_reg].type == FRAME_PTR && diff --git a/net/core/filter.c b/net/core/filter.c index 77cdfb455e7f..94d26201080d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -785,6 +785,11 @@ static int bpf_check_classic(const struct sock_filter *filter, if (ftest->k == 0) return -EINVAL; break; + case BPF_ALU | BPF_LSH | BPF_K: + case BPF_ALU | BPF_RSH | BPF_K: + if (ftest->k >= 32) + return -EINVAL; + break; case BPF_LD | BPF_MEM: case BPF_LDX | BPF_MEM: case BPF_ST: -- cgit v1.2.3