summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorSunil Khatri <sunil.khatri@amd.com>2026-03-12 13:55:32 +0530
committerAlex Deucher <alexander.deucher@amd.com>2026-03-17 10:42:39 -0400
commitf802f7b0bc0917023f4b5938246fd7abf23fa5e3 (patch)
tree8811c173dbc49aa412b439d0cb0b4b7176f8d403 /drivers/gpu
parent7a14a4e9b3fda05b907d0445a3be9e7c0e887f4e (diff)
downloadlinux-f802f7b0bc0917023f4b5938246fd7abf23fa5e3.tar.gz
linux-f802f7b0bc0917023f4b5938246fd7abf23fa5e3.zip
drm/amdgpu/userq: unlock cancel_delayed_work_sync for hang_detect_work
cancel_delayed_work_sync for work hand_detect_work should not be locked since the amdgpu_userq_hang_detect_work also need the same mutex and when they run together it could be a deadlock. we do not need to hold the mutex for cancel_delayed_work_sync(&queue->hang_detect_work). With this in place if cancel and worker thread run at same time they will not deadlock. Due to any failures if there is a hand detect and reset that there a deadlock scenarios between cancel and running the main thread. [ 243.118276] task:kworker/9:0 state:D stack:0 pid:73 tgid:73 ppid:2 task_flags:0x4208060 flags:0x00080000 [ 243.118283] Workqueue: events amdgpu_userq_hang_detect_work [amdgpu] [ 243.118636] Call Trace: [ 243.118639] <TASK> [ 243.118644] __schedule+0x581/0x1810 [ 243.118649] ? srso_return_thunk+0x5/0x5f [ 243.118656] ? srso_return_thunk+0x5/0x5f [ 243.118659] ? wake_up_process+0x15/0x20 [ 243.118665] schedule+0x64/0xe0 [ 243.118668] schedule_preempt_disabled+0x15/0x30 [ 243.118671] __mutex_lock+0x346/0x950 [ 243.118677] __mutex_lock_slowpath+0x13/0x20 [ 243.118681] mutex_lock+0x2c/0x40 [ 243.118684] amdgpu_userq_hang_detect_work+0x63/0x90 [amdgpu] [ 243.118888] process_scheduled_works+0x1f0/0x450 [ 243.118894] worker_thread+0x27f/0x370 [ 243.118899] kthread+0x1ed/0x210 [ 243.118903] ? __pfx_worker_thread+0x10/0x10 [ 243.118906] ? srso_return_thunk+0x5/0x5f [ 243.118909] ? __pfx_kthread+0x10/0x10 [ 243.118913] ret_from_fork+0x10f/0x1b0 [ 243.118916] ? __pfx_kthread+0x10/0x10 [ 243.118920] ret_from_fork_asm+0x1a/0x30 Signed-off-by: Sunil Khatri <sunil.khatri@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c11
1 files changed, 6 insertions, 5 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 5619233e1b68..3cc6e8da48a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -623,13 +623,14 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_que
int r = 0;
cancel_delayed_work_sync(&uq_mgr->resume_work);
+
+ /* Cancel any pending hang detection work and cleanup */
+ cancel_delayed_work_sync(&queue->hang_detect_work);
+
mutex_lock(&uq_mgr->userq_mutex);
+ queue->hang_detect_fence = NULL;
amdgpu_userq_wait_for_last_fence(queue);
- /* Cancel any pending hang detection work and cleanup */
- if (queue->hang_detect_fence) {
- cancel_delayed_work_sync(&queue->hang_detect_work);
- queue->hang_detect_fence = NULL;
- }
+
r = amdgpu_bo_reserve(queue->db_obj.obj, true);
if (!r) {
amdgpu_bo_unpin(queue->db_obj.obj);