aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2025-09-08 18:00:07 -0700
committerVlastimil Babka <vbabka@suse.cz>2025-09-29 09:42:36 +0200
commitaf92793e52c3a99b828ed4bdd277fd3e11c18d08 (patch)
tree73e589f9851693b44c2dfe026a2893e2dcd8452d /include
parentslab: Reuse first bit for OBJEXTS_ALLOC_FAIL (diff)
downloadlinux-af92793e52c3a99b828ed4bdd277fd3e11c18d08.tar.gz
linux-af92793e52c3a99b828ed4bdd277fd3e11c18d08.zip
slab: Introduce kmalloc_nolock() and kfree_nolock().
kmalloc_nolock() relies on ability of local_trylock_t to detect the situation when per-cpu kmem_cache is locked. In !PREEMPT_RT local_(try)lock_irqsave(&s->cpu_slab->lock, flags) disables IRQs and marks s->cpu_slab->lock as acquired. local_lock_is_locked(&s->cpu_slab->lock) returns true when slab is in the middle of manipulating per-cpu cache of that specific kmem_cache. kmalloc_nolock() can be called from any context and can re-enter into ___slab_alloc(): kmalloc() -> ___slab_alloc(cache_A) -> irqsave -> NMI -> bpf -> kmalloc_nolock() -> ___slab_alloc(cache_B) or kmalloc() -> ___slab_alloc(cache_A) -> irqsave -> tracepoint/kprobe -> bpf -> kmalloc_nolock() -> ___slab_alloc(cache_B) Hence the caller of ___slab_alloc() checks if &s->cpu_slab->lock can be acquired without a deadlock before invoking the function. If that specific per-cpu kmem_cache is busy the kmalloc_nolock() retries in a different kmalloc bucket. The second attempt will likely succeed, since this cpu locked different kmem_cache. Similarly, in PREEMPT_RT local_lock_is_locked() returns true when per-cpu rt_spin_lock is locked by current _task_. In this case re-entrance into the same kmalloc bucket is unsafe, and kmalloc_nolock() tries a different bucket that is most likely is not locked by the current task. Though it may be locked by a different task it's safe to rt_spin_lock() and sleep on it. Similar to alloc_pages_nolock() the kmalloc_nolock() returns NULL immediately if called from hard irq or NMI in PREEMPT_RT. kfree_nolock() defers freeing to irq_work when local_lock_is_locked() and (in_nmi() or in PREEMPT_RT). SLUB_TINY config doesn't use local_lock_is_locked() and relies on spin_trylock_irqsave(&n->list_lock) to allocate, while kfree_nolock() always defers to irq_work. Note, kfree_nolock() must be called _only_ for objects allocated with kmalloc_nolock(). Debug checks (like kmemleak and kfence) were skipped on allocation, hence obj = kmalloc(); kfree_nolock(obj); will miss kmemleak/kfence book keeping and will cause false positives. large_kmalloc is not supported by either kmalloc_nolock() or kfree_nolock(). Signed-off-by: Alexei Starovoitov <ast@kernel.org> Reviewed-by: Harry Yoo <harry.yoo@oracle.com> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Diffstat (limited to 'include')
-rw-r--r--include/linux/kasan.h13
-rw-r--r--include/linux/memcontrol.h2
-rw-r--r--include/linux/slab.h4
3 files changed, 14 insertions, 5 deletions
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 890011071f2b..acdc8cb0152e 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -200,7 +200,7 @@ static __always_inline bool kasan_slab_pre_free(struct kmem_cache *s,
}
bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init,
- bool still_accessible);
+ bool still_accessible, bool no_quarantine);
/**
* kasan_slab_free - Poison, initialize, and quarantine a slab object.
* @object: Object to be freed.
@@ -226,11 +226,13 @@ bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init,
* @Return true if KASAN took ownership of the object; false otherwise.
*/
static __always_inline bool kasan_slab_free(struct kmem_cache *s,
- void *object, bool init,
- bool still_accessible)
+ void *object, bool init,
+ bool still_accessible,
+ bool no_quarantine)
{
if (kasan_enabled())
- return __kasan_slab_free(s, object, init, still_accessible);
+ return __kasan_slab_free(s, object, init, still_accessible,
+ no_quarantine);
return false;
}
@@ -427,7 +429,8 @@ static inline bool kasan_slab_pre_free(struct kmem_cache *s, void *object)
}
static inline bool kasan_slab_free(struct kmem_cache *s, void *object,
- bool init, bool still_accessible)
+ bool init, bool still_accessible,
+ bool no_quarantine)
{
return false;
}
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d254c0b96d0d..82563236f35c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -358,6 +358,8 @@ enum objext_flags {
* MEMCG_DATA_OBJEXTS.
*/
OBJEXTS_ALLOC_FAIL = __OBJEXTS_ALLOC_FAIL,
+ /* slabobj_ext vector allocated with kmalloc_nolock() */
+ OBJEXTS_NOSPIN_ALLOC = __FIRST_OBJEXT_FLAG,
/* the next bit after the last actual flag */
__NR_OBJEXTS_FLAGS = (__FIRST_OBJEXT_FLAG << 1),
};
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 680193356ac7..561597dd2164 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -501,6 +501,7 @@ void * __must_check krealloc_noprof(const void *objp, size_t new_size,
#define krealloc(...) alloc_hooks(krealloc_noprof(__VA_ARGS__))
void kfree(const void *objp);
+void kfree_nolock(const void *objp);
void kfree_sensitive(const void *objp);
size_t __ksize(const void *objp);
@@ -957,6 +958,9 @@ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t f
}
#define kmalloc(...) alloc_hooks(kmalloc_noprof(__VA_ARGS__))
+void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node);
+#define kmalloc_nolock(...) alloc_hooks(kmalloc_nolock_noprof(__VA_ARGS__))
+
#define kmem_buckets_alloc(_b, _size, _flags) \
alloc_hooks(__kmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE))