aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-03-26 18:08:55 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-03-26 18:08:55 -0700
commit9b960d8cd6f712cb2c03e2bdd4d5ca058238037f (patch)
treea1381af6c79626c0a28679f804477b43b7c91565 /drivers/block
parentMerge tag 'for-6.15/io_uring-20250322' of git://git.kernel.dk/linux (diff)
parentMerge tag 'nvme-6.15-2025-03-20' of git://git.infradead.org/nvme into for-6.1... (diff)
downloadlinux-9b960d8cd6f712cb2c03e2bdd4d5ca058238037f.tar.gz
linux-9b960d8cd6f712cb2c03e2bdd4d5ca058238037f.zip
Merge tag 'for-6.15/block-20250322' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe: - Fixes for integrity handling - NVMe pull request via Keith: - Secure concatenation for TCP transport (Hannes) - Multipath sysfs visibility (Nilay) - Various cleanups (Qasim, Baruch, Wang, Chen, Mike, Damien, Li) - Correct use of 64-bit BARs for pci-epf target (Niklas) - Socket fix for selinux when used in containers (Peijie) - MD pull request via Yu: - fix recovery can preempt resync (Li Nan) - fix md-bitmap IO limit (Su Yue) - fix raid10 discard with REQ_NOWAIT (Xiao Ni) - fix raid1 memory leak (Zheng Qixing) - fix mddev uaf (Yu Kuai) - fix raid1,raid10 IO flags (Yu Kuai) - some refactor and cleanup (Yu Kuai) - Series cleaning up and fixing bugs in the bad block handling code - Improve support for write failure simulation in null_blk - Various lock ordering fixes - Fixes for locking for debugfs attributes - Various ublk related fixes and improvements - Cleanups for blk-rq-qos wait handling - blk-throttle fixes - Fixes for loop dio and sync handling - Fixes and cleanups for the auto-PI code - Block side support for hardware encryption keys in blk-crypto - Various cleanups and fixes * tag 'for-6.15/block-20250322' of git://git.kernel.dk/linux: (105 commits) nvmet: replace max(a, min(b, c)) by clamp(val, lo, hi) nvme-tcp: fix selinux denied when calling sock_sendmsg nvmet: pci-epf: Always configure BAR0 as 64-bit nvmet: Remove duplicate uuid_copy nvme: zns: Simplify nvme_zone_parse_entry() nvmet: pci-epf: Remove redundant 'flush_workqueue()' calls nvmet-fc: Remove unused functions nvme-pci: remove stale comment nvme-fc: Utilise min3() to simplify queue count calculation nvme-multipath: Add visibility for queue-depth io-policy nvme-multipath: Add visibility for numa io-policy nvme-multipath: Add visibility for round-robin io-policy nvmet: add tls_concat and tls_key debugfs entries nvmet-tcp: support secure channel concatenation nvmet: Add 'sq' argument to alloc_ctrl_args nvme-fabrics: reset admin connection for secure concatenation nvme-tcp: request secure channel concatenation nvme-keyring: add nvme_tls_psk_refresh() nvme: add nvme_auth_derive_tls_psk() nvme: add nvme_auth_generate_digest() ...
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/loop.c106
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c2
-rw-r--r--drivers/block/null_blk/main.c177
-rw-r--r--drivers/block/null_blk/null_blk.h6
-rw-r--r--drivers/block/null_blk/zoned.c20
-rw-r--r--drivers/block/rnbd/rnbd-clt.c2
-rw-r--r--drivers/block/sunvdc.c2
-rw-r--r--drivers/block/ublk_drv.c115
-rw-r--r--drivers/block/virtio_blk.c2
-rw-r--r--drivers/block/xen-blkfront.c2
10 files changed, 259 insertions, 175 deletions
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index c05fe27a96b6..674527d770dc 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -45,8 +45,6 @@ enum {
Lo_deleting,
};
-struct loop_func_table;
-
struct loop_device {
int lo_number;
loff_t lo_offset;
@@ -54,7 +52,8 @@ struct loop_device {
int lo_flags;
char lo_file_name[LO_NAME_SIZE];
- struct file * lo_backing_file;
+ struct file *lo_backing_file;
+ unsigned int lo_min_dio_size;
struct block_device *lo_device;
gfp_t old_gfp_mask;
@@ -169,29 +168,14 @@ static loff_t get_loop_size(struct loop_device *lo, struct file *file)
* of backing device, and the logical block size of loop is bigger than that of
* the backing device.
*/
-static bool lo_bdev_can_use_dio(struct loop_device *lo,
- struct block_device *backing_bdev)
-{
- unsigned int sb_bsize = bdev_logical_block_size(backing_bdev);
-
- if (queue_logical_block_size(lo->lo_queue) < sb_bsize)
- return false;
- if (lo->lo_offset & (sb_bsize - 1))
- return false;
- return true;
-}
-
static bool lo_can_use_dio(struct loop_device *lo)
{
- struct inode *inode = lo->lo_backing_file->f_mapping->host;
-
if (!(lo->lo_backing_file->f_mode & FMODE_CAN_ODIRECT))
return false;
-
- if (S_ISBLK(inode->i_mode))
- return lo_bdev_can_use_dio(lo, I_BDEV(inode));
- if (inode->i_sb->s_bdev)
- return lo_bdev_can_use_dio(lo, inode->i_sb->s_bdev);
+ if (queue_logical_block_size(lo->lo_queue) < lo->lo_min_dio_size)
+ return false;
+ if (lo->lo_offset & (lo->lo_min_dio_size - 1))
+ return false;
return true;
}
@@ -205,20 +189,12 @@ static bool lo_can_use_dio(struct loop_device *lo)
*/
static inline void loop_update_dio(struct loop_device *lo)
{
- bool dio_in_use = lo->lo_flags & LO_FLAGS_DIRECT_IO;
-
lockdep_assert_held(&lo->lo_mutex);
WARN_ON_ONCE(lo->lo_state == Lo_bound &&
lo->lo_queue->mq_freeze_depth == 0);
- if (lo->lo_backing_file->f_flags & O_DIRECT)
- lo->lo_flags |= LO_FLAGS_DIRECT_IO;
if ((lo->lo_flags & LO_FLAGS_DIRECT_IO) && !lo_can_use_dio(lo))
lo->lo_flags &= ~LO_FLAGS_DIRECT_IO;
-
- /* flush dirty pages before starting to issue direct I/O */
- if ((lo->lo_flags & LO_FLAGS_DIRECT_IO) && !dio_in_use)
- vfs_fsync(lo->lo_backing_file, 0);
}
/**
@@ -541,6 +517,28 @@ static void loop_reread_partitions(struct loop_device *lo)
__func__, lo->lo_number, lo->lo_file_name, rc);
}
+static unsigned int loop_query_min_dio_size(struct loop_device *lo)
+{
+ struct file *file = lo->lo_backing_file;
+ struct block_device *sb_bdev = file->f_mapping->host->i_sb->s_bdev;
+ struct kstat st;
+
+ /*
+ * Use the minimal dio alignment of the file system if provided.
+ */
+ if (!vfs_getattr(&file->f_path, &st, STATX_DIOALIGN, 0) &&
+ (st.result_mask & STATX_DIOALIGN))
+ return st.dio_offset_align;
+
+ /*
+ * In a perfect world this wouldn't be needed, but as of Linux 6.13 only
+ * a handful of file systems support the STATX_DIOALIGN flag.
+ */
+ if (sb_bdev)
+ return bdev_logical_block_size(sb_bdev);
+ return SECTOR_SIZE;
+}
+
static inline int is_loop_device(struct file *file)
{
struct inode *i = file->f_mapping->host;
@@ -573,6 +571,17 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
return 0;
}
+static void loop_assign_backing_file(struct loop_device *lo, struct file *file)
+{
+ lo->lo_backing_file = file;
+ lo->old_gfp_mask = mapping_gfp_mask(file->f_mapping);
+ mapping_set_gfp_mask(file->f_mapping,
+ lo->old_gfp_mask & ~(__GFP_IO | __GFP_FS));
+ if (lo->lo_backing_file->f_flags & O_DIRECT)
+ lo->lo_flags |= LO_FLAGS_DIRECT_IO;
+ lo->lo_min_dio_size = loop_query_min_dio_size(lo);
+}
+
/*
* loop_change_fd switched the backing store of a loopback device to
* a new file. This is useful for operating system installers to free up
@@ -622,14 +631,18 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
goto out_err;
+ /*
+ * We might switch to direct I/O mode for the loop device, write back
+ * all dirty data the page cache now that so that the individual I/O
+ * operations don't have to do that.
+ */
+ vfs_fsync(file, 0);
+
/* and ... switch */
disk_force_media_change(lo->lo_disk);
memflags = blk_mq_freeze_queue(lo->lo_queue);
mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
- lo->lo_backing_file = file;
- lo->old_gfp_mask = mapping_gfp_mask(file->f_mapping);
- mapping_set_gfp_mask(file->f_mapping,
- lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
+ loop_assign_backing_file(lo, file);
loop_update_dio(lo);
blk_mq_unfreeze_queue(lo->lo_queue, memflags);
partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
@@ -971,12 +984,11 @@ loop_set_status_from_info(struct loop_device *lo,
return 0;
}
-static unsigned int loop_default_blocksize(struct loop_device *lo,
- struct block_device *backing_bdev)
+static unsigned int loop_default_blocksize(struct loop_device *lo)
{
- /* In case of direct I/O, match underlying block size */
- if ((lo->lo_backing_file->f_flags & O_DIRECT) && backing_bdev)
- return bdev_logical_block_size(backing_bdev);
+ /* In case of direct I/O, match underlying minimum I/O size */
+ if (lo->lo_flags & LO_FLAGS_DIRECT_IO)
+ return lo->lo_min_dio_size;
return SECTOR_SIZE;
}
@@ -994,7 +1006,7 @@ static void loop_update_limits(struct loop_device *lo, struct queue_limits *lim,
backing_bdev = inode->i_sb->s_bdev;
if (!bsize)
- bsize = loop_default_blocksize(lo, backing_bdev);
+ bsize = loop_default_blocksize(lo);
loop_get_discard_config(lo, &granularity, &max_discard_sectors);
@@ -1019,7 +1031,6 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
const struct loop_config *config)
{
struct file *file = fget(config->fd);
- struct address_space *mapping;
struct queue_limits lim;
int error;
loff_t size;
@@ -1055,8 +1066,6 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
if (error)
goto out_unlock;
- mapping = file->f_mapping;
-
if ((config->info.lo_flags & ~LOOP_CONFIGURE_SETTABLE_FLAGS) != 0) {
error = -EINVAL;
goto out_unlock;
@@ -1088,9 +1097,7 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
lo->lo_device = bdev;
- lo->lo_backing_file = file;
- lo->old_gfp_mask = mapping_gfp_mask(mapping);
- mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
+ loop_assign_backing_file(lo, file);
lim = queue_limits_start_update(lo->lo_queue);
loop_update_limits(lo, &lim, config->block_size);
@@ -1099,6 +1106,13 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
if (error)
goto out_unlock;
+ /*
+ * We might switch to direct I/O mode for the loop device, write back
+ * all dirty data the page cache now that so that the individual I/O
+ * operations don't have to do that.
+ */
+ vfs_fsync(file, 0);
+
loop_update_dio(lo);
loop_sysfs_init(lo);
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 95361099a2dc..0d619df03fa9 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -2056,7 +2056,7 @@ static void mtip_hw_submit_io(struct driver_data *dd, struct request *rq,
unsigned int nents;
/* Map the scatter list for DMA access */
- nents = blk_rq_map_sg(hctx->queue, rq, command->sg);
+ nents = blk_rq_map_sg(rq, command->sg);
nents = dma_map_sg(&dd->pdev->dev, command->sg, nents, dma_dir);
prefetch(&port->flags);
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index da1ecbf988b8..3bb9cee0a9b5 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -473,6 +473,8 @@ NULLB_DEVICE_ATTR(shared_tags, bool, NULL);
NULLB_DEVICE_ATTR(shared_tag_bitmap, bool, NULL);
NULLB_DEVICE_ATTR(fua, bool, NULL);
NULLB_DEVICE_ATTR(rotational, bool, NULL);
+NULLB_DEVICE_ATTR(badblocks_once, bool, NULL);
+NULLB_DEVICE_ATTR(badblocks_partial_io, bool, NULL);
static ssize_t nullb_device_power_show(struct config_item *item, char *page)
{
@@ -559,14 +561,14 @@ static ssize_t nullb_device_badblocks_store(struct config_item *item,
goto out;
/* enable badblocks */
cmpxchg(&t_dev->badblocks.shift, -1, 0);
- if (buf[0] == '+')
- ret = badblocks_set(&t_dev->badblocks, start,
- end - start + 1, 1);
- else
- ret = badblocks_clear(&t_dev->badblocks, start,
- end - start + 1);
- if (ret == 0)
+ if (buf[0] == '+') {
+ if (badblocks_set(&t_dev->badblocks, start,
+ end - start + 1, 1))
+ ret = count;
+ } else if (badblocks_clear(&t_dev->badblocks, start,
+ end - start + 1)) {
ret = count;
+ }
out:
kfree(orig);
return ret;
@@ -592,41 +594,43 @@ static ssize_t nullb_device_zone_offline_store(struct config_item *item,
CONFIGFS_ATTR_WO(nullb_device_, zone_offline);
static struct configfs_attribute *nullb_device_attrs[] = {
- &nullb_device_attr_size,
+ &nullb_device_attr_badblocks,
+ &nullb_device_attr_badblocks_once,
+ &nullb_device_attr_badblocks_partial_io,
+ &nullb_device_attr_blocking,
+ &nullb_device_attr_blocksize,
+ &nullb_device_attr_cache_size,
&nullb_device_attr_completion_nsec,
- &nullb_device_attr_submit_queues,
- &nullb_device_attr_poll_queues,
+ &nullb_device_attr_discard,
+ &nullb_device_attr_fua,
&nullb_device_attr_home_node,
- &nullb_device_attr_queue_mode,
- &nullb_device_attr_blocksize,
- &nullb_device_attr_max_sectors,
- &nullb_device_attr_irqmode,
&nullb_device_attr_hw_queue_depth,
&nullb_device_attr_index,
- &nullb_device_attr_blocking,
- &nullb_device_attr_use_per_node_hctx,
- &nullb_device_attr_power,
- &nullb_device_attr_memory_backed,
- &nullb_device_attr_discard,
+ &nullb_device_attr_irqmode,
+ &nullb_device_attr_max_sectors,
&nullb_device_attr_mbps,
- &nullb_device_attr_cache_size,
- &nullb_device_attr_badblocks,
- &nullb_device_attr_zoned,
- &nullb_device_attr_zone_size,
+ &nullb_device_attr_memory_backed,
+ &nullb_device_attr_no_sched,
+ &nullb_device_attr_poll_queues,
+ &nullb_device_attr_power,
+ &nullb_device_attr_queue_mode,
+ &nullb_device_attr_rotational,
+ &nullb_device_attr_shared_tag_bitmap,
+ &nullb_device_attr_shared_tags,
+ &nullb_device_attr_size,
+ &nullb_device_attr_submit_queues,
+ &nullb_device_attr_use_per_node_hctx,
+ &nullb_device_attr_virt_boundary,
+ &nullb_device_attr_zone_append_max_sectors,
&nullb_device_attr_zone_capacity,
- &nullb_device_attr_zone_nr_conv,
- &nullb_device_attr_zone_max_open,
+ &nullb_device_attr_zone_full,
&nullb_device_attr_zone_max_active,
- &nullb_device_attr_zone_append_max_sectors,
- &nullb_device_attr_zone_readonly,
+ &nullb_device_attr_zone_max_open,
+ &nullb_device_attr_zone_nr_conv,
&nullb_device_attr_zone_offline,
- &nullb_device_attr_zone_full,
- &nullb_device_attr_virt_boundary,
- &nullb_device_attr_no_sched,
- &nullb_device_attr_shared_tags,
- &nullb_device_attr_shared_tag_bitmap,
- &nullb_device_attr_fua,
- &nullb_device_attr_rotational,
+ &nullb_device_attr_zone_readonly,
+ &nullb_device_attr_zone_size,
+ &nullb_device_attr_zoned,
NULL,
};
@@ -704,16 +708,28 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item)
static ssize_t memb_group_features_show(struct config_item *item, char *page)
{
- return snprintf(page, PAGE_SIZE,
- "badblocks,blocking,blocksize,cache_size,fua,"
- "completion_nsec,discard,home_node,hw_queue_depth,"
- "irqmode,max_sectors,mbps,memory_backed,no_sched,"
- "poll_queues,power,queue_mode,shared_tag_bitmap,"
- "shared_tags,size,submit_queues,use_per_node_hctx,"
- "virt_boundary,zoned,zone_capacity,zone_max_active,"
- "zone_max_open,zone_nr_conv,zone_offline,zone_readonly,"
- "zone_size,zone_append_max_sectors,zone_full,"
- "rotational\n");
+
+ struct configfs_attribute **entry;
+ char delimiter = ',';
+ size_t left = PAGE_SIZE;
+ size_t written = 0;
+ int ret;
+
+ for (entry = &nullb_device_attrs[0]; *entry && left > 0; entry++) {
+ if (!*(entry + 1))
+ delimiter = '\n';
+ ret = snprintf(page + written, left, "%s%c", (*entry)->ca_name,
+ delimiter);
+ if (ret >= left) {
+ WARN_ONCE(1, "Too many null_blk features to print\n");
+ memzero_explicit(page, PAGE_SIZE);
+ return -ENOBUFS;
+ }
+ left -= ret;
+ written += ret;
+ }
+
+ return written;
}
CONFIGFS_ATTR_RO(memb_group_, features);
@@ -1249,25 +1265,37 @@ static int null_transfer(struct nullb *nullb, struct page *page,
return err;
}
-static blk_status_t null_handle_rq(struct nullb_cmd *cmd)
+/*
+ * Transfer data for the given request. The transfer size is capped with the
+ * nr_sectors argument.
+ */
+static blk_status_t null_handle_data_transfer(struct nullb_cmd *cmd,
+ sector_t nr_sectors)
{
struct request *rq = blk_mq_rq_from_pdu(cmd);
struct nullb *nullb = cmd->nq->dev->nullb;
int err = 0;
unsigned int len;
sector_t sector = blk_rq_pos(rq);
+ unsigned int max_bytes = nr_sectors << SECTOR_SHIFT;
+ unsigned int transferred_bytes = 0;
struct req_iterator iter;
struct bio_vec bvec;
spin_lock_irq(&nullb->lock);
rq_for_each_segment(bvec, rq, iter) {
len = bvec.bv_len;
+ if (transferred_bytes + len > max_bytes)
+ len = max_bytes - transferred_bytes;
err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
op_is_write(req_op(rq)), sector,
rq->cmd_flags & REQ_FUA);
if (err)
break;
sector += len >> SECTOR_SHIFT;
+ transferred_bytes += len;
+ if (transferred_bytes >= max_bytes)
+ break;
}
spin_unlock_irq(&nullb->lock);
@@ -1295,31 +1323,51 @@ static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd)
return sts;
}
-static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd,
- sector_t sector,
- sector_t nr_sectors)
+/*
+ * Check if the command should fail for the badblocks. If so, return
+ * BLK_STS_IOERR and return number of partial I/O sectors to be written or read,
+ * which may be less than the requested number of sectors.
+ *
+ * @cmd: The command to handle.
+ * @sector: The start sector for I/O.
+ * @nr_sectors: Specifies number of sectors to write or read, and returns the
+ * number of sectors to be written or read.
+ */
+blk_status_t null_handle_badblocks(struct nullb_cmd *cmd, sector_t sector,
+ unsigned int *nr_sectors)
{
struct badblocks *bb = &cmd->nq->dev->badblocks;
- sector_t first_bad;
- int bad_sectors;
+ struct nullb_device *dev = cmd->nq->dev;
+ unsigned int block_sectors = dev->blocksize >> SECTOR_SHIFT;
+ sector_t first_bad, bad_sectors;
+ unsigned int partial_io_sectors = 0;
- if (badblocks_check(bb, sector, nr_sectors, &first_bad, &bad_sectors))
- return BLK_STS_IOERR;
+ if (!badblocks_check(bb, sector, *nr_sectors, &first_bad, &bad_sectors))
+ return BLK_STS_OK;
- return BLK_STS_OK;
+ if (cmd->nq->dev->badblocks_once)
+ badblocks_clear(bb, first_bad, bad_sectors);
+
+ if (cmd->nq->dev->badblocks_partial_io) {
+ if (!IS_ALIGNED(first_bad, block_sectors))
+ first_bad = ALIGN_DOWN(first_bad, block_sectors);
+ if (sector < first_bad)
+ partial_io_sectors = first_bad - sector;
+ }
+ *nr_sectors = partial_io_sectors;
+
+ return BLK_STS_IOERR;
}
-static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd,
- enum req_op op,
- sector_t sector,
- sector_t nr_sectors)
+blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, enum req_op op,
+ sector_t sector, sector_t nr_sectors)
{
struct nullb_device *dev = cmd->nq->dev;
if (op == REQ_OP_DISCARD)
return null_handle_discard(dev, sector, nr_sectors);
- return null_handle_rq(cmd);
+ return null_handle_data_transfer(cmd, nr_sectors);
}
static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd)
@@ -1366,18 +1414,19 @@ blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op,
sector_t sector, unsigned int nr_sectors)
{
struct nullb_device *dev = cmd->nq->dev;
+ blk_status_t badblocks_ret = BLK_STS_OK;
blk_status_t ret;
- if (dev->badblocks.shift != -1) {
- ret = null_handle_badblocks(cmd, sector, nr_sectors);
+ if (dev->badblocks.shift != -1)
+ badblocks_ret = null_handle_badblocks(cmd, sector, &nr_sectors);
+
+ if (dev->memory_backed && nr_sectors) {
+ ret = null_handle_memory_backed(cmd, op, sector, nr_sectors);
if (ret != BLK_STS_OK)
return ret;
}
- if (dev->memory_backed)
- return null_handle_memory_backed(cmd, op, sector, nr_sectors);
-
- return BLK_STS_OK;
+ return badblocks_ret;
}
static void null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
index 6f9fe6171087..7bb6128dbaaf 100644
--- a/drivers/block/null_blk/null_blk.h
+++ b/drivers/block/null_blk/null_blk.h
@@ -63,6 +63,8 @@ struct nullb_device {
unsigned long flags; /* device flags */
unsigned int curr_cache;
struct badblocks badblocks;
+ bool badblocks_once;
+ bool badblocks_partial_io;
unsigned int nr_zones;
unsigned int nr_zones_imp_open;
@@ -131,6 +133,10 @@ blk_status_t null_handle_discard(struct nullb_device *dev, sector_t sector,
sector_t nr_sectors);
blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op,
sector_t sector, unsigned int nr_sectors);
+blk_status_t null_handle_badblocks(struct nullb_cmd *cmd, sector_t sector,
+ unsigned int *nr_sectors);
+blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, enum req_op op,
+ sector_t sector, sector_t nr_sectors);
#ifdef CONFIG_BLK_DEV_ZONED
int null_init_zoned_dev(struct nullb_device *dev, struct queue_limits *lim);
diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
index 0d5f9bf95229..4e5728f45989 100644
--- a/drivers/block/null_blk/zoned.c
+++ b/drivers/block/null_blk/zoned.c
@@ -353,6 +353,7 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
struct nullb_device *dev = cmd->nq->dev;
unsigned int zno = null_zone_no(dev, sector);
struct nullb_zone *zone = &dev->zones[zno];
+ blk_status_t badblocks_ret = BLK_STS_OK;
blk_status_t ret;
trace_nullb_zone_op(cmd, zno, zone->cond);
@@ -412,9 +413,20 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
zone->cond = BLK_ZONE_COND_IMP_OPEN;
}
- ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
- if (ret != BLK_STS_OK)
- goto unlock_zone;
+ if (dev->badblocks.shift != -1) {
+ badblocks_ret = null_handle_badblocks(cmd, sector, &nr_sectors);
+ if (badblocks_ret != BLK_STS_OK && !nr_sectors) {
+ ret = badblocks_ret;
+ goto unlock_zone;
+ }
+ }
+
+ if (dev->memory_backed) {
+ ret = null_handle_memory_backed(cmd, REQ_OP_WRITE, sector,
+ nr_sectors);
+ if (ret != BLK_STS_OK)
+ goto unlock_zone;
+ }
zone->wp += nr_sectors;
if (zone->wp == zone->start + zone->capacity) {
@@ -429,7 +441,7 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
zone->cond = BLK_ZONE_COND_FULL;
}
- ret = BLK_STS_OK;
+ ret = badblocks_ret;
unlock_zone:
null_unlock_zone(dev, zone);
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index 82467ecde7ec..15627417f12e 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -1010,7 +1010,7 @@ static int rnbd_client_xfer_request(struct rnbd_clt_dev *dev,
* See queue limits.
*/
if ((req_op(rq) != REQ_OP_DISCARD) && (req_op(rq) != REQ_OP_WRITE_ZEROES))
- sg_cnt = blk_rq_map_sg(dev->queue, rq, iu->sgt.sgl);
+ sg_cnt = blk_rq_map_sg(rq, iu->sgt.sgl);
if (sg_cnt == 0)
sg_mark_end(&iu->sgt.sgl[0]);
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 282f81616a78..2b33fb5b949b 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -485,7 +485,7 @@ static int __send_request(struct request *req)
}
sg_init_table(sg, port->ring_cookies);
- nsg = blk_rq_map_sg(req->q, req, sg);
+ nsg = blk_rq_map_sg(req, sg);
len = 0;
for (i = 0; i < nsg; i++)
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 6d7b26ab434f..c060da409ed8 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -73,11 +73,10 @@
/* All UBLK_PARAM_TYPE_* should be included here */
#define UBLK_PARAM_TYPE_ALL \
(UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD | \
- UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED)
+ UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED | \
+ UBLK_PARAM_TYPE_DMA_ALIGN)
struct ublk_rq_data {
- struct llist_node node;
-
struct kref ref;
};
@@ -144,8 +143,6 @@ struct ublk_queue {
struct task_struct *ubq_daemon;
char *io_cmd_buf;
- struct llist_head io_cmds;
-
unsigned long io_addr; /* mapped vm address */
unsigned int max_io_sz;
bool force_abort;
@@ -494,15 +491,17 @@ static wait_queue_head_t ublk_idr_wq; /* wait until one idr is freed */
static DEFINE_MUTEX(ublk_ctl_mutex);
+
+#define UBLK_MAX_UBLKS UBLK_MINORS
+
/*
- * Max ublk devices allowed to add
+ * Max unprivileged ublk devices allowed to add
*
* It can be extended to one per-user limit in future or even controlled
* by cgroup.
*/
-#define UBLK_MAX_UBLKS UBLK_MINORS
-static unsigned int ublks_max = 64;
-static unsigned int ublks_added; /* protected by ublk_ctl_mutex */
+static unsigned int unprivileged_ublks_max = 64;
+static unsigned int unprivileged_ublks_added; /* protected by ublk_ctl_mutex */
static struct miscdevice ublk_misc;
@@ -573,6 +572,16 @@ static int ublk_validate_params(const struct ublk_device *ub)
else if (ublk_dev_is_zoned(ub))
return -EINVAL;
+ if (ub->params.types & UBLK_PARAM_TYPE_DMA_ALIGN) {
+ const struct ublk_param_dma_align *p = &ub->params.dma;
+
+ if (p->alignment >= PAGE_SIZE)
+ return -EINVAL;
+
+ if (!is_power_of_2(p->alignment + 1))
+ return -EINVAL;
+ }
+
return 0;
}
@@ -1100,7 +1109,7 @@ static void ublk_complete_rq(struct kref *ref)
}
/*
- * Since __ublk_rq_task_work always fails requests immediately during
+ * Since ublk_rq_task_work_cb always fails requests immediately during
* exiting, __ublk_fail_req() is only called from abort context during
* exiting. So lock is unnecessary.
*
@@ -1146,11 +1155,14 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
blk_mq_end_request(rq, BLK_STS_IOERR);
}
-static inline void __ublk_rq_task_work(struct request *req,
- unsigned issue_flags)
+static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd,
+ unsigned int issue_flags)
{
- struct ublk_queue *ubq = req->mq_hctx->driver_data;
- int tag = req->tag;
+ struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
+ struct ublk_queue *ubq = pdu->ubq;
+ int tag = pdu->tag;
+ struct request *req = blk_mq_tag_to_rq(
+ ubq->dev->tag_set.tags[ubq->q_id], tag);
struct ublk_io *io = &ubq->ios[tag];
unsigned int mapped_bytes;
@@ -1225,34 +1237,11 @@ static inline void __ublk_rq_task_work(struct request *req,
ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags);
}
-static inline void ublk_forward_io_cmds(struct ublk_queue *ubq,
- unsigned issue_flags)
-{
- struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds);
- struct ublk_rq_data *data, *tmp;
-
- io_cmds = llist_reverse_order(io_cmds);
- llist_for_each_entry_safe(data, tmp, io_cmds, node)
- __ublk_rq_task_work(blk_mq_rq_from_pdu(data), issue_flags);
-}
-
-static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, unsigned issue_flags)
-{
- struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
- struct ublk_queue *ubq = pdu->ubq;
-
- ublk_forward_io_cmds(ubq, issue_flags);
-}
-
static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
{
- struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
+ struct ublk_io *io = &ubq->ios[rq->tag];
- if (llist_add(&data->node, &ubq->io_cmds)) {
- struct ublk_io *io = &ubq->ios[rq->tag];
-
- io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb);
- }
+ io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb);
}
static enum blk_eh_timer_return ublk_timeout(struct request *rq)
@@ -1445,7 +1434,7 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)
struct request *rq;
/*
- * Either we fail the request or ublk_rq_task_work_fn
+ * Either we fail the request or ublk_rq_task_work_cb
* will do it
*/
rq = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], i);
@@ -1911,10 +1900,9 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
return -EIOCBQUEUED;
out:
- io_uring_cmd_done(cmd, ret, 0, issue_flags);
pr_devel("%s: complete: cmd op %d, tag %d ret %x io_flags %x\n",
__func__, cmd_op, tag, ret, io->flags);
- return -EIOCBQUEUED;
+ return ret;
}
static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
@@ -1970,7 +1958,10 @@ static inline int ublk_ch_uring_cmd_local(struct io_uring_cmd *cmd,
static void ublk_ch_uring_cmd_cb(struct io_uring_cmd *cmd,
unsigned int issue_flags)
{
- ublk_ch_uring_cmd_local(cmd, issue_flags);
+ int ret = ublk_ch_uring_cmd_local(cmd, issue_flags);
+
+ if (ret != -EIOCBQUEUED)
+ io_uring_cmd_done(cmd, ret, 0, issue_flags);
}
static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
@@ -2235,7 +2226,8 @@ static int ublk_add_chdev(struct ublk_device *ub)
if (ret)
goto fail;
- ublks_added++;
+ if (ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV)
+ unprivileged_ublks_added++;
return 0;
fail:
put_device(dev);
@@ -2264,11 +2256,16 @@ static int ublk_add_tag_set(struct ublk_device *ub)
static void ublk_remove(struct ublk_device *ub)
{
+ bool unprivileged;
+
ublk_stop_dev(ub);
cancel_work_sync(&ub->nosrv_work);
cdev_device_del(&ub->cdev, &ub->cdev_dev);
+ unprivileged = ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV;
ublk_put_device(ub);
- ublks_added--;
+
+ if (unprivileged)
+ unprivileged_ublks_added--;
}
static struct ublk_device *ublk_get_device_from_id(int idx)
@@ -2343,6 +2340,9 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
if (ub->params.basic.attrs & UBLK_ATTR_ROTATIONAL)
lim.features |= BLK_FEAT_ROTATIONAL;
+ if (ub->params.types & UBLK_PARAM_TYPE_DMA_ALIGN)
+ lim.dma_alignment = ub->params.dma.alignment;
+
if (wait_for_completion_interruptible(&ub->completion) != 0)
return -EINTR;
@@ -2530,7 +2530,8 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
return ret;
ret = -EACCES;
- if (ublks_added >= ublks_max)
+ if ((info.flags & UBLK_F_UNPRIVILEGED_DEV) &&
+ unprivileged_ublks_added >= unprivileged_ublks_max)
goto out_unlock;
ret = -ENOMEM;
@@ -3101,10 +3102,9 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
if (ub)
ublk_put_device(ub);
out:
- io_uring_cmd_done(cmd, ret, 0, issue_flags);
pr_devel("%s: cmd done ret %d cmd_op %x, dev id %d qid %d\n",
__func__, ret, cmd->cmd_op, header->dev_id, header->queue_id);
- return -EIOCBQUEUED;
+ return ret;
}
static const struct file_operations ublk_ctl_fops = {
@@ -3168,23 +3168,26 @@ static void __exit ublk_exit(void)
module_init(ublk_init);
module_exit(ublk_exit);
-static int ublk_set_max_ublks(const char *buf, const struct kernel_param *kp)
+static int ublk_set_max_unprivileged_ublks(const char *buf,
+ const struct kernel_param *kp)
{
return param_set_uint_minmax(buf, kp, 0, UBLK_MAX_UBLKS);
}
-static int ublk_get_max_ublks(char *buf, const struct kernel_param *kp)
+static int ublk_get_max_unprivileged_ublks(char *buf,
+ const struct kernel_param *kp)
{
- return sysfs_emit(buf, "%u\n", ublks_max);
+ return sysfs_emit(buf, "%u\n", unprivileged_ublks_max);
}
-static const struct kernel_param_ops ublk_max_ublks_ops = {
- .set = ublk_set_max_ublks,
- .get = ublk_get_max_ublks,
+static const struct kernel_param_ops ublk_max_unprivileged_ublks_ops = {
+ .set = ublk_set_max_unprivileged_ublks,
+ .get = ublk_get_max_unprivileged_ublks,
};
-module_param_cb(ublks_max, &ublk_max_ublks_ops, &ublks_max, 0644);
-MODULE_PARM_DESC(ublks_max, "max number of ublk devices allowed to add(default: 64)");
+module_param_cb(ublks_max, &ublk_max_unprivileged_ublks_ops,
+ &unprivileged_ublks_max, 0644);
+MODULE_PARM_DESC(ublks_max, "max number of unprivileged ublk devices allowed to add(default: 64)");
MODULE_AUTHOR("Ming Lei <ming.lei@redhat.com>");
MODULE_DESCRIPTION("Userspace block device");
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 91cde76a4b3e..7cffea01d868 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -226,7 +226,7 @@ static int virtblk_map_data(struct blk_mq_hw_ctx *hctx, struct request *req,
if (unlikely(err))
return -ENOMEM;
- return blk_rq_map_sg(hctx->queue, req, vbr->sg_table.sgl);
+ return blk_rq_map_sg(req, vbr->sg_table.sgl);
}
static void virtblk_cleanup_cmd(struct request *req)
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index edcd08a9dcef..5babe575c288 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -751,7 +751,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
id = blkif_ring_get_request(rinfo, req, &final_ring_req);
ring_req = &rinfo->shadow[id].req;
- num_sg = blk_rq_map_sg(req->q, req, rinfo->shadow[id].sg);
+ num_sg = blk_rq_map_sg(req, rinfo->shadow[id].sg);
num_grant = 0;
/* Calculate the number of grant used */
for_each_sg(rinfo->shadow[id].sg, sg, num_sg, i)