From 3051247e4faa32a3d90c762a243c2c62dde310db Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 11 Jul 2025 16:30:09 +0800
Subject: block: fix kobject leak in blk_unregister_queue

The kobject for the queue, `disk->queue_kobj`, is initialized with a
reference count of 1 via `kobject_init()` in `blk_register_queue()`.
While `kobject_del()` is called during the unregister path to remove
the kobject from sysfs, the initial reference is never released.

Add a call to `kobject_put()` in `blk_unregister_queue()` to properly
decrement the reference count and fix the leak.

Fixes: 2bd85221a625 ("block: untangle request_queue refcounting from sysfs")
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20250711083009.2574432-1-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-sysfs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index b2b9b89d6967..c611444480b3 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -960,4 +960,5 @@ void blk_unregister_queue(struct gendisk *disk)
 		elevator_set_none(q);
 
 	blk_debugfs_remove(disk);
+	kobject_put(&disk->queue_kobj);
 }
-- 
cgit v1.2.3


From 80d7762e0a42307ee31b21f090e21349b98c14f6 Mon Sep 17 00:00:00 2001
From: Zheng Qixing <zhengqixing@huawei.com>
Date: Tue, 1 Jul 2025 15:17:17 +0800
Subject: nvme: fix inconsistent RCU list manipulation in
 nvme_ns_add_to_ctrl_list()

When inserting a namespace into the controller's namespace list, the
function uses list_add_rcu() when the namespace is inserted in the middle
of the list, but falls back to a regular list_add() when adding at the
head of the list.

This inconsistency could lead to race conditions during concurrent
access, as users might observe a partially updated list. Fix this by
consistently using list_add_rcu() in both code paths to ensure proper
RCU protection throughout the entire function.

Fixes: be647e2c76b2 ("nvme: use srcu for iterating namespace list")
Signed-off-by: Zheng Qixing <zhengqixing@huawei.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 7493e5aa984c..5eb45d26c553 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4077,7 +4077,7 @@ static void nvme_ns_add_to_ctrl_list(struct nvme_ns *ns)
 			return;
 		}
 	}
-	list_add(&ns->list, &ns->ctrl->namespaces);
+	list_add_rcu(&ns->list, &ns->ctrl->namespaces);
 }
 
 static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
-- 
cgit v1.2.3


From dd8e34afd6709cb2f9c0e63340f567e6c066ed8e Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Mon, 30 Jun 2025 16:21:53 +0000
Subject: nvme: fix endianness of command word prints in
 nvme_log_err_passthru()

The command word members of struct nvme_common_command are __le32 type,
so use helper le32_to_cpu() to read them properly.

Fixes: 9f079dda1433 ("nvme: allow passthru cmd error logging")
Signed-off-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Alan Adamson <alan.adamson@oracle.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 5eb45d26c553..1d2240494f3d 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -381,12 +381,12 @@ static void nvme_log_err_passthru(struct request *req)
 		nr->status & NVME_SC_MASK,	/* Status Code */
 		nr->status & NVME_STATUS_MORE ? "MORE " : "",
 		nr->status & NVME_STATUS_DNR  ? "DNR "  : "",
-		nr->cmd->common.cdw10,
-		nr->cmd->common.cdw11,
-		nr->cmd->common.cdw12,
-		nr->cmd->common.cdw13,
-		nr->cmd->common.cdw14,
-		nr->cmd->common.cdw15);
+		le32_to_cpu(nr->cmd->common.cdw10),
+		le32_to_cpu(nr->cmd->common.cdw11),
+		le32_to_cpu(nr->cmd->common.cdw12),
+		le32_to_cpu(nr->cmd->common.cdw13),
+		le32_to_cpu(nr->cmd->common.cdw14),
+		le32_to_cpu(nr->cmd->common.cdw15));
 }
 
 enum nvme_disposition {
-- 
cgit v1.2.3


From 1fc09f2961f5c6d8bb53bc989f17b12fdc6bc93d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 9 Jul 2025 11:54:08 +0200
Subject: nvme: revert the cross-controller atomic write size validation

This was originally added by commit 8695f060a029 ("nvme: all namespaces
in a subsystem must adhere to a common atomic write size") to check
the all controllers in a subsystem report the same atomic write size,
but the check wasn't quite correct and caused problems for devices
with multiple namespaces that report different LBA sizes.  Commit
f46d273449ba ("nvme: fix atomic write size validation") tried to fix
this, but then caused problems for namespace rediscovery after a
format with an LBA size change that changes the AWUPF value.

This drops the validation and essentially reverts those two commits while
keeping the cleanup that went in between the two.  We'll need to figure
out how to properly check for the mouse trap that nvme left us, but for
now revert the check to keep devices working for users who couldn't care
less about the atomic write feature.

Fixes: 8695f060a029 ("nvme: all namespaces in a subsystem must adhere to a common atomic write size")
Fixes: f46d273449ba ("nvme: fix atomic write size validation")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Alan Adamson <alan.adamson@oracle.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Tested-by: Alan Adamson <alan.adamson@oracle.com>
---
 drivers/nvme/host/core.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 1d2240494f3d..fdd2334dacca 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3537,15 +3537,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
 		if (ret)
 			goto out_free;
 	}
-
-	if (le16_to_cpu(id->awupf) != ctrl->subsys->awupf) {
-		dev_err_ratelimited(ctrl->device,
-			"inconsistent AWUPF, controller not added (%u/%u).\n",
-			le16_to_cpu(id->awupf), ctrl->subsys->awupf);
-		ret = -EINVAL;
-		goto out_free;
-	}
-
 	memcpy(ctrl->subsys->firmware_rev, id->fr,
 	       sizeof(ctrl->subsys->firmware_rev));
 
-- 
cgit v1.2.3


From 71257925e83eae1cb6913d65ca71927d2220e6d1 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Tue, 15 Jul 2025 09:28:12 +0800
Subject: nvme: fix misaccounting of nvme-mpath inflight I/O

Procedures for nvme-mpath IO accounting:

 1) initialize nvme_request and clear flags;
 2) set NVME_MPATH_IO_STATS and increase inflight counter when IO
    started;
 3) check NVME_MPATH_IO_STATS and decrease inflight counter when IO is
    done;

However, for the case nvme_fail_nonready_command(), both step 1) and 2)
are skipped, and if old nvme_request set NVME_MPATH_IO_STATS and then
request is reused, step 3) will still be executed, causing inflight I/O
counter to be negative.

Fix the problem by clearing nvme_request in nvme_fail_nonready_command().

Fixes: ea5e5f42cd2c ("nvme-fabrics: avoid double completions in nvmf_fail_nonready_command")
Reported-by: Yi Zhang <yi.zhang@redhat.com>
Closes: https://lore.kernel.org/all/CAHj4cs_+dauobyYyP805t33WMJVzOWj=7+51p4_j9rA63D9sog@mail.gmail.com/
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index fdd2334dacca..895fb163d48e 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -764,6 +764,10 @@ blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
 	    !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
 	    !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
 		return BLK_STS_RESOURCE;
+
+	if (!(rq->rq_flags & RQF_DONTPREP))
+		nvme_clear_nvme_request(rq);
+
 	return nvme_host_path_error(rq);
 }
 EXPORT_SYMBOL_GPL(nvme_fail_nonready_command);
-- 
cgit v1.2.3


From 0523c6cc87e558c50ff4489c87c54c55068b1169 Mon Sep 17 00:00:00 2001
From: Maurizio Lombardi <mlombard@redhat.com>
Date: Fri, 4 Jul 2025 16:44:54 +0200
Subject: nvmet-tcp: fix callback lock for TLS handshake

When restoring the default socket callbacks during a TLS handshake, we
need to acquire a write lock on sk_callback_lock.  Previously, a read
lock was used, which is insufficient for modifying sk_user_data and
sk_data_ready.

Fixes: 675b453e0241 ("nvmet-tcp: enable TLS handshake upcall")
Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/tcp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index c6603bd9c95e..c58d2caef70a 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1928,10 +1928,10 @@ static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
 		struct sock *sk = queue->sock->sk;
 
 		/* Restore the default callbacks before starting upcall */
-		read_lock_bh(&sk->sk_callback_lock);
+		write_lock_bh(&sk->sk_callback_lock);
 		sk->sk_user_data = NULL;
 		sk->sk_data_ready = port->data_ready;
-		read_unlock_bh(&sk->sk_callback_lock);
+		write_unlock_bh(&sk->sk_callback_lock);
 		if (!nvmet_tcp_try_peek_pdu(queue)) {
 			if (!nvmet_tcp_tls_handshake(queue))
 				return;
-- 
cgit v1.2.3


From c4706c5058a7bd7d7c20f3b24a8f523ecad44e83 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 16 Jul 2025 19:48:08 +0800
Subject: loop: use kiocb helpers to fix lockdep warning

The lockdep tool can report a circular lock dependency warning in the loop
driver's AIO read/write path:

```
[ 6540.587728] kworker/u96:5/72779 is trying to acquire lock:
[ 6540.593856] ff110001b5968440 (sb_writers#9){.+.+}-{0:0}, at: loop_process_work+0x11a/0xf70 [loop]
[ 6540.603786]
[ 6540.603786] but task is already holding lock:
[ 6540.610291] ff110001b5968440 (sb_writers#9){.+.+}-{0:0}, at: loop_process_work+0x11a/0xf70 [loop]
[ 6540.620210]
[ 6540.620210] other info that might help us debug this:
[ 6540.627499]  Possible unsafe locking scenario:
[ 6540.627499]
[ 6540.634110]        CPU0
[ 6540.636841]        ----
[ 6540.639574]   lock(sb_writers#9);
[ 6540.643281]   lock(sb_writers#9);
[ 6540.646988]
[ 6540.646988]  *** DEADLOCK ***
```

This patch fixes the issue by using the AIO-specific helpers
`kiocb_start_write()` and `kiocb_end_write()`. These functions are
designed to be used with a `kiocb` and manage write sequencing
correctly for asynchronous I/O without introducing the problematic
lock dependency.

The `kiocb` is already part of the `loop_cmd` struct, so this change
also simplifies the completion function `lo_rw_aio_do_completion()` by
using the `iocb` from the `cmd` struct directly, instead of retrieving
the loop device from the request queue.

Fixes: 39d86db34e41 ("loop: add file_start_write() and file_end_write()")
Cc: Changhui Zhong <czhong@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20250716114808.3159657-1-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/loop.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 500840e4a74e..8d994cae3b83 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -308,14 +308,13 @@ end_io:
 static void lo_rw_aio_do_completion(struct loop_cmd *cmd)
 {
 	struct request *rq = blk_mq_rq_from_pdu(cmd);
-	struct loop_device *lo = rq->q->queuedata;
 
 	if (!atomic_dec_and_test(&cmd->ref))
 		return;
 	kfree(cmd->bvec);
 	cmd->bvec = NULL;
 	if (req_op(rq) == REQ_OP_WRITE)
-		file_end_write(lo->lo_backing_file);
+		kiocb_end_write(&cmd->iocb);
 	if (likely(!blk_should_fake_timeout(rq->q)))
 		blk_mq_complete_request(rq);
 }
@@ -391,7 +390,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
 	}
 
 	if (rw == ITER_SOURCE) {
-		file_start_write(lo->lo_backing_file);
+		kiocb_start_write(&cmd->iocb);
 		ret = file->f_op->write_iter(&cmd->iocb, &iter);
 	} else
 		ret = file->f_op->read_iter(&cmd->iocb, &iter);
-- 
cgit v1.2.3