From 3051247e4faa32a3d90c762a243c2c62dde310db Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 11 Jul 2025 16:30:09 +0800 Subject: block: fix kobject leak in blk_unregister_queue The kobject for the queue, `disk->queue_kobj`, is initialized with a reference count of 1 via `kobject_init()` in `blk_register_queue()`. While `kobject_del()` is called during the unregister path to remove the kobject from sysfs, the initial reference is never released. Add a call to `kobject_put()` in `blk_unregister_queue()` to properly decrement the reference count and fix the leak. Fixes: 2bd85221a625 ("block: untangle request_queue refcounting from sysfs") Cc: Christoph Hellwig Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250711083009.2574432-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index b2b9b89d6967..c611444480b3 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -960,4 +960,5 @@ void blk_unregister_queue(struct gendisk *disk) elevator_set_none(q); blk_debugfs_remove(disk); + kobject_put(&disk->queue_kobj); } -- cgit v1.2.3 From 80d7762e0a42307ee31b21f090e21349b98c14f6 Mon Sep 17 00:00:00 2001 From: Zheng Qixing Date: Tue, 1 Jul 2025 15:17:17 +0800 Subject: nvme: fix inconsistent RCU list manipulation in nvme_ns_add_to_ctrl_list() When inserting a namespace into the controller's namespace list, the function uses list_add_rcu() when the namespace is inserted in the middle of the list, but falls back to a regular list_add() when adding at the head of the list. This inconsistency could lead to race conditions during concurrent access, as users might observe a partially updated list. Fix this by consistently using list_add_rcu() in both code paths to ensure proper RCU protection throughout the entire function. Fixes: be647e2c76b2 ("nvme: use srcu for iterating namespace list") Signed-off-by: Zheng Qixing Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 7493e5aa984c..5eb45d26c553 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4077,7 +4077,7 @@ static void nvme_ns_add_to_ctrl_list(struct nvme_ns *ns) return; } } - list_add(&ns->list, &ns->ctrl->namespaces); + list_add_rcu(&ns->list, &ns->ctrl->namespaces); } static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) -- cgit v1.2.3 From dd8e34afd6709cb2f9c0e63340f567e6c066ed8e Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 30 Jun 2025 16:21:53 +0000 Subject: nvme: fix endianness of command word prints in nvme_log_err_passthru() The command word members of struct nvme_common_command are __le32 type, so use helper le32_to_cpu() to read them properly. Fixes: 9f079dda1433 ("nvme: allow passthru cmd error logging") Signed-off-by: John Garry Reviewed-by: Alan Adamson Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 5eb45d26c553..1d2240494f3d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -381,12 +381,12 @@ static void nvme_log_err_passthru(struct request *req) nr->status & NVME_SC_MASK, /* Status Code */ nr->status & NVME_STATUS_MORE ? "MORE " : "", nr->status & NVME_STATUS_DNR ? "DNR " : "", - nr->cmd->common.cdw10, - nr->cmd->common.cdw11, - nr->cmd->common.cdw12, - nr->cmd->common.cdw13, - nr->cmd->common.cdw14, - nr->cmd->common.cdw15); + le32_to_cpu(nr->cmd->common.cdw10), + le32_to_cpu(nr->cmd->common.cdw11), + le32_to_cpu(nr->cmd->common.cdw12), + le32_to_cpu(nr->cmd->common.cdw13), + le32_to_cpu(nr->cmd->common.cdw14), + le32_to_cpu(nr->cmd->common.cdw15)); } enum nvme_disposition { -- cgit v1.2.3 From 1fc09f2961f5c6d8bb53bc989f17b12fdc6bc93d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 9 Jul 2025 11:54:08 +0200 Subject: nvme: revert the cross-controller atomic write size validation This was originally added by commit 8695f060a029 ("nvme: all namespaces in a subsystem must adhere to a common atomic write size") to check the all controllers in a subsystem report the same atomic write size, but the check wasn't quite correct and caused problems for devices with multiple namespaces that report different LBA sizes. Commit f46d273449ba ("nvme: fix atomic write size validation") tried to fix this, but then caused problems for namespace rediscovery after a format with an LBA size change that changes the AWUPF value. This drops the validation and essentially reverts those two commits while keeping the cleanup that went in between the two. We'll need to figure out how to properly check for the mouse trap that nvme left us, but for now revert the check to keep devices working for users who couldn't care less about the atomic write feature. Fixes: 8695f060a029 ("nvme: all namespaces in a subsystem must adhere to a common atomic write size") Fixes: f46d273449ba ("nvme: fix atomic write size validation") Signed-off-by: Christoph Hellwig Reviewed-by: Alan Adamson Reviewed-by: Keith Busch Reviewed-by: John Garry Reviewed-by: Chaitanya Kulkarni Tested-by: Alan Adamson --- drivers/nvme/host/core.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 1d2240494f3d..fdd2334dacca 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3537,15 +3537,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) if (ret) goto out_free; } - - if (le16_to_cpu(id->awupf) != ctrl->subsys->awupf) { - dev_err_ratelimited(ctrl->device, - "inconsistent AWUPF, controller not added (%u/%u).\n", - le16_to_cpu(id->awupf), ctrl->subsys->awupf); - ret = -EINVAL; - goto out_free; - } - memcpy(ctrl->subsys->firmware_rev, id->fr, sizeof(ctrl->subsys->firmware_rev)); -- cgit v1.2.3 From 71257925e83eae1cb6913d65ca71927d2220e6d1 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 15 Jul 2025 09:28:12 +0800 Subject: nvme: fix misaccounting of nvme-mpath inflight I/O Procedures for nvme-mpath IO accounting: 1) initialize nvme_request and clear flags; 2) set NVME_MPATH_IO_STATS and increase inflight counter when IO started; 3) check NVME_MPATH_IO_STATS and decrease inflight counter when IO is done; However, for the case nvme_fail_nonready_command(), both step 1) and 2) are skipped, and if old nvme_request set NVME_MPATH_IO_STATS and then request is reused, step 3) will still be executed, causing inflight I/O counter to be negative. Fix the problem by clearing nvme_request in nvme_fail_nonready_command(). Fixes: ea5e5f42cd2c ("nvme-fabrics: avoid double completions in nvmf_fail_nonready_command") Reported-by: Yi Zhang Closes: https://lore.kernel.org/all/CAHj4cs_+dauobyYyP805t33WMJVzOWj=7+51p4_j9rA63D9sog@mail.gmail.com/ Signed-off-by: Yu Kuai Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fdd2334dacca..895fb163d48e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -764,6 +764,10 @@ blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl, !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) && !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) return BLK_STS_RESOURCE; + + if (!(rq->rq_flags & RQF_DONTPREP)) + nvme_clear_nvme_request(rq); + return nvme_host_path_error(rq); } EXPORT_SYMBOL_GPL(nvme_fail_nonready_command); -- cgit v1.2.3 From 0523c6cc87e558c50ff4489c87c54c55068b1169 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Fri, 4 Jul 2025 16:44:54 +0200 Subject: nvmet-tcp: fix callback lock for TLS handshake When restoring the default socket callbacks during a TLS handshake, we need to acquire a write lock on sk_callback_lock. Previously, a read lock was used, which is insufficient for modifying sk_user_data and sk_data_ready. Fixes: 675b453e0241 ("nvmet-tcp: enable TLS handshake upcall") Signed-off-by: Maurizio Lombardi Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index c6603bd9c95e..c58d2caef70a 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1928,10 +1928,10 @@ static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, struct sock *sk = queue->sock->sk; /* Restore the default callbacks before starting upcall */ - read_lock_bh(&sk->sk_callback_lock); + write_lock_bh(&sk->sk_callback_lock); sk->sk_user_data = NULL; sk->sk_data_ready = port->data_ready; - read_unlock_bh(&sk->sk_callback_lock); + write_unlock_bh(&sk->sk_callback_lock); if (!nvmet_tcp_try_peek_pdu(queue)) { if (!nvmet_tcp_tls_handshake(queue)) return; -- cgit v1.2.3 From c4706c5058a7bd7d7c20f3b24a8f523ecad44e83 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 16 Jul 2025 19:48:08 +0800 Subject: loop: use kiocb helpers to fix lockdep warning The lockdep tool can report a circular lock dependency warning in the loop driver's AIO read/write path: ``` [ 6540.587728] kworker/u96:5/72779 is trying to acquire lock: [ 6540.593856] ff110001b5968440 (sb_writers#9){.+.+}-{0:0}, at: loop_process_work+0x11a/0xf70 [loop] [ 6540.603786] [ 6540.603786] but task is already holding lock: [ 6540.610291] ff110001b5968440 (sb_writers#9){.+.+}-{0:0}, at: loop_process_work+0x11a/0xf70 [loop] [ 6540.620210] [ 6540.620210] other info that might help us debug this: [ 6540.627499] Possible unsafe locking scenario: [ 6540.627499] [ 6540.634110] CPU0 [ 6540.636841] ---- [ 6540.639574] lock(sb_writers#9); [ 6540.643281] lock(sb_writers#9); [ 6540.646988] [ 6540.646988] *** DEADLOCK *** ``` This patch fixes the issue by using the AIO-specific helpers `kiocb_start_write()` and `kiocb_end_write()`. These functions are designed to be used with a `kiocb` and manage write sequencing correctly for asynchronous I/O without introducing the problematic lock dependency. The `kiocb` is already part of the `loop_cmd` struct, so this change also simplifies the completion function `lo_rw_aio_do_completion()` by using the `iocb` from the `cmd` struct directly, instead of retrieving the loop device from the request queue. Fixes: 39d86db34e41 ("loop: add file_start_write() and file_end_write()") Cc: Changhui Zhong Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250716114808.3159657-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/loop.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 500840e4a74e..8d994cae3b83 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -308,14 +308,13 @@ end_io: static void lo_rw_aio_do_completion(struct loop_cmd *cmd) { struct request *rq = blk_mq_rq_from_pdu(cmd); - struct loop_device *lo = rq->q->queuedata; if (!atomic_dec_and_test(&cmd->ref)) return; kfree(cmd->bvec); cmd->bvec = NULL; if (req_op(rq) == REQ_OP_WRITE) - file_end_write(lo->lo_backing_file); + kiocb_end_write(&cmd->iocb); if (likely(!blk_should_fake_timeout(rq->q))) blk_mq_complete_request(rq); } @@ -391,7 +390,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, } if (rw == ITER_SOURCE) { - file_start_write(lo->lo_backing_file); + kiocb_start_write(&cmd->iocb); ret = file->f_op->write_iter(&cmd->iocb, &iter); } else ret = file->f_op->read_iter(&cmd->iocb, &iter); -- cgit v1.2.3