From c5b6ababd21ab6bb251e5a2b4db110e76fb00a26 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Mon, 12 May 2025 14:04:02 -0400 Subject: locking/mutex: implement mutex_trylock_nested Despite the fact that several lockdep-related checks are skipped when calling trylock* versions of the locking primitives, for example mutex_trylock, each time the mutex is acquired, a held_lock is still placed onto the lockdep stack by __lock_acquire() which is called regardless of whether the trylock* or regular locking API was used. This means that if the caller successfully acquires more than MAX_LOCK_DEPTH locks of the same class, even when using mutex_trylock, lockdep will still complain that the maximum depth of the held lock stack has been reached and disable itself. For example, the following error currently occurs in the ARM version of KVM, once the code tries to lock all vCPUs of a VM configured with more than MAX_LOCK_DEPTH vCPUs, a situation that can easily happen on modern systems, where having more than 48 CPUs is common, and it's also common to run VMs that have vCPU counts approaching that number: [ 328.171264] BUG: MAX_LOCK_DEPTH too low! [ 328.175227] turning off the locking correctness validator. [ 328.180726] Please attach the output of /proc/lock_stat to the bug report [ 328.187531] depth: 48 max: 48! [ 328.190678] 48 locks held by qemu-kvm/11664: [ 328.194957] #0: ffff800086de5ba0 (&kvm->lock){+.+.}-{3:3}, at: kvm_ioctl_create_device+0x174/0x5b0 [ 328.204048] #1: ffff0800e78800b8 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0 [ 328.212521] #2: ffff07ffeee51e98 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0 [ 328.220991] #3: ffff0800dc7d80b8 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0 [ 328.229463] #4: ffff07ffe0c980b8 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0 [ 328.237934] #5: ffff0800a3883c78 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0 [ 328.246405] #6: ffff07fffbe480b8 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0 Luckily, in all instances that require locking all vCPUs, the 'kvm->lock' is taken a priori, and that fact makes it possible to use the little known feature of lockdep, called a 'nest_lock', to avoid this warning and subsequent lockdep self-disablement. The action of 'nested lock' being provided to lockdep's lock_acquire(), causes the lockdep to detect that the top of the held lock stack contains a lock of the same class and then increment its reference counter instead of pushing a new held_lock item onto that stack. See __lock_acquire for more information. Signed-off-by: Maxim Levitsky Acked-by: Peter Zijlstra (Intel) Message-ID: <20250512180407.659015-2-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- kernel/locking/mutex.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 555e2b3a665a..c75a838d3bae 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -1062,6 +1062,7 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock, #endif +#ifndef CONFIG_DEBUG_LOCK_ALLOC /** * mutex_trylock - try to acquire the mutex, without waiting * @lock: the mutex to be acquired @@ -1077,18 +1078,25 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock, * mutex must be released by the same task that acquired it. */ int __sched mutex_trylock(struct mutex *lock) +{ + MUTEX_WARN_ON(lock->magic != lock); + return __mutex_trylock(lock); +} +EXPORT_SYMBOL(mutex_trylock); +#else +int __sched _mutex_trylock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock) { bool locked; MUTEX_WARN_ON(lock->magic != lock); - locked = __mutex_trylock(lock); if (locked) - mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); + mutex_acquire_nest(&lock->dep_map, 0, 1, nest_lock, _RET_IP_); return locked; } -EXPORT_SYMBOL(mutex_trylock); +EXPORT_SYMBOL(_mutex_trylock_nest_lock); +#endif #ifndef CONFIG_DEBUG_LOCK_ALLOC int __sched -- cgit v1.2.3 From fb49f07ba1d9d8c9bd8854878ae6b5b21ff9ac45 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Mon, 12 May 2025 14:04:03 -0400 Subject: locking/mutex: implement mutex_lock_killable_nest_lock KVM's SEV intra-host migration code needs to lock all vCPUs of the source and the target VM, before it proceeds with the migration. The number of vCPUs that belong to each VM is not bounded by anything except a self-imposed KVM limit of CONFIG_KVM_MAX_NR_VCPUS vCPUs which is significantly larger than the depth of lockdep's lock stack. Luckily, the locks in both of the cases mentioned above, are held under the 'kvm->lock' of each VM, which means that we can use the little known lockdep feature called a "nest_lock" to support this use case in a cleaner way, compared to the way it's currently done. Implement and expose 'mutex_lock_killable_nest_lock' for this purpose. Signed-off-by: Maxim Levitsky Acked-by: Peter Zijlstra (Intel) Message-ID: <20250512180407.659015-3-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- include/linux/mutex.h | 17 +++++++++++++---- kernel/locking/mutex.c | 7 ++++--- 2 files changed, 17 insertions(+), 7 deletions(-) (limited to 'kernel/locking') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index da4518cfd59c..a039fa8c1780 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -156,16 +156,15 @@ static inline int __devm_mutex_init(struct device *dev, struct mutex *lock) #ifdef CONFIG_DEBUG_LOCK_ALLOC extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass); extern void _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock); - extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass); -extern int __must_check mutex_lock_killable_nested(struct mutex *lock, - unsigned int subclass); +extern int __must_check _mutex_lock_killable(struct mutex *lock, + unsigned int subclass, struct lockdep_map *nest_lock); extern void mutex_lock_io_nested(struct mutex *lock, unsigned int subclass); #define mutex_lock(lock) mutex_lock_nested(lock, 0) #define mutex_lock_interruptible(lock) mutex_lock_interruptible_nested(lock, 0) -#define mutex_lock_killable(lock) mutex_lock_killable_nested(lock, 0) +#define mutex_lock_killable(lock) _mutex_lock_killable(lock, 0, NULL) #define mutex_lock_io(lock) mutex_lock_io_nested(lock, 0) #define mutex_lock_nest_lock(lock, nest_lock) \ @@ -174,6 +173,15 @@ do { \ _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \ } while (0) +#define mutex_lock_killable_nest_lock(lock, nest_lock) \ +( \ + typecheck(struct lockdep_map *, &(nest_lock)->dep_map), \ + _mutex_lock_killable(lock, 0, &(nest_lock)->dep_map) \ +) + +#define mutex_lock_killable_nested(lock, subclass) \ + _mutex_lock_killable(lock, subclass, NULL) + #else extern void mutex_lock(struct mutex *lock); extern int __must_check mutex_lock_interruptible(struct mutex *lock); @@ -183,6 +191,7 @@ extern void mutex_lock_io(struct mutex *lock); # define mutex_lock_nested(lock, subclass) mutex_lock(lock) # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock) +# define mutex_lock_killable_nest_lock(lock, nest_lock) mutex_lock_killable(lock) # define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock) # define mutex_lock_io_nested(lock, subclass) mutex_lock_io(lock) #endif diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index c75a838d3bae..234923121ff0 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -808,11 +808,12 @@ _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock); int __sched -mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass) +_mutex_lock_killable(struct mutex *lock, unsigned int subclass, + struct lockdep_map *nest) { - return __mutex_lock(lock, TASK_KILLABLE, subclass, NULL, _RET_IP_); + return __mutex_lock(lock, TASK_KILLABLE, subclass, nest, _RET_IP_); } -EXPORT_SYMBOL_GPL(mutex_lock_killable_nested); +EXPORT_SYMBOL_GPL(_mutex_lock_killable); int __sched mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) -- cgit v1.2.3