diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-26 18:08:55 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-26 18:08:55 -0700 |
| commit | 9b960d8cd6f712cb2c03e2bdd4d5ca058238037f (patch) | |
| tree | a1381af6c79626c0a28679f804477b43b7c91565 /drivers/block | |
| parent | Merge tag 'for-6.15/io_uring-20250322' of git://git.kernel.dk/linux (diff) | |
| parent | Merge tag 'nvme-6.15-2025-03-20' of git://git.infradead.org/nvme into for-6.1... (diff) | |
| download | linux-9b960d8cd6f712cb2c03e2bdd4d5ca058238037f.tar.gz linux-9b960d8cd6f712cb2c03e2bdd4d5ca058238037f.zip | |
Merge tag 'for-6.15/block-20250322' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- Fixes for integrity handling
- NVMe pull request via Keith:
- Secure concatenation for TCP transport (Hannes)
- Multipath sysfs visibility (Nilay)
- Various cleanups (Qasim, Baruch, Wang, Chen, Mike, Damien, Li)
- Correct use of 64-bit BARs for pci-epf target (Niklas)
- Socket fix for selinux when used in containers (Peijie)
- MD pull request via Yu:
- fix recovery can preempt resync (Li Nan)
- fix md-bitmap IO limit (Su Yue)
- fix raid10 discard with REQ_NOWAIT (Xiao Ni)
- fix raid1 memory leak (Zheng Qixing)
- fix mddev uaf (Yu Kuai)
- fix raid1,raid10 IO flags (Yu Kuai)
- some refactor and cleanup (Yu Kuai)
- Series cleaning up and fixing bugs in the bad block handling code
- Improve support for write failure simulation in null_blk
- Various lock ordering fixes
- Fixes for locking for debugfs attributes
- Various ublk related fixes and improvements
- Cleanups for blk-rq-qos wait handling
- blk-throttle fixes
- Fixes for loop dio and sync handling
- Fixes and cleanups for the auto-PI code
- Block side support for hardware encryption keys in blk-crypto
- Various cleanups and fixes
* tag 'for-6.15/block-20250322' of git://git.kernel.dk/linux: (105 commits)
nvmet: replace max(a, min(b, c)) by clamp(val, lo, hi)
nvme-tcp: fix selinux denied when calling sock_sendmsg
nvmet: pci-epf: Always configure BAR0 as 64-bit
nvmet: Remove duplicate uuid_copy
nvme: zns: Simplify nvme_zone_parse_entry()
nvmet: pci-epf: Remove redundant 'flush_workqueue()' calls
nvmet-fc: Remove unused functions
nvme-pci: remove stale comment
nvme-fc: Utilise min3() to simplify queue count calculation
nvme-multipath: Add visibility for queue-depth io-policy
nvme-multipath: Add visibility for numa io-policy
nvme-multipath: Add visibility for round-robin io-policy
nvmet: add tls_concat and tls_key debugfs entries
nvmet-tcp: support secure channel concatenation
nvmet: Add 'sq' argument to alloc_ctrl_args
nvme-fabrics: reset admin connection for secure concatenation
nvme-tcp: request secure channel concatenation
nvme-keyring: add nvme_tls_psk_refresh()
nvme: add nvme_auth_derive_tls_psk()
nvme: add nvme_auth_generate_digest()
...
Diffstat (limited to 'drivers/block')
| -rw-r--r-- | drivers/block/loop.c | 106 | ||||
| -rw-r--r-- | drivers/block/mtip32xx/mtip32xx.c | 2 | ||||
| -rw-r--r-- | drivers/block/null_blk/main.c | 177 | ||||
| -rw-r--r-- | drivers/block/null_blk/null_blk.h | 6 | ||||
| -rw-r--r-- | drivers/block/null_blk/zoned.c | 20 | ||||
| -rw-r--r-- | drivers/block/rnbd/rnbd-clt.c | 2 | ||||
| -rw-r--r-- | drivers/block/sunvdc.c | 2 | ||||
| -rw-r--r-- | drivers/block/ublk_drv.c | 115 | ||||
| -rw-r--r-- | drivers/block/virtio_blk.c | 2 | ||||
| -rw-r--r-- | drivers/block/xen-blkfront.c | 2 |
10 files changed, 259 insertions, 175 deletions
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index c05fe27a96b6..674527d770dc 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -45,8 +45,6 @@ enum { Lo_deleting, }; -struct loop_func_table; - struct loop_device { int lo_number; loff_t lo_offset; @@ -54,7 +52,8 @@ struct loop_device { int lo_flags; char lo_file_name[LO_NAME_SIZE]; - struct file * lo_backing_file; + struct file *lo_backing_file; + unsigned int lo_min_dio_size; struct block_device *lo_device; gfp_t old_gfp_mask; @@ -169,29 +168,14 @@ static loff_t get_loop_size(struct loop_device *lo, struct file *file) * of backing device, and the logical block size of loop is bigger than that of * the backing device. */ -static bool lo_bdev_can_use_dio(struct loop_device *lo, - struct block_device *backing_bdev) -{ - unsigned int sb_bsize = bdev_logical_block_size(backing_bdev); - - if (queue_logical_block_size(lo->lo_queue) < sb_bsize) - return false; - if (lo->lo_offset & (sb_bsize - 1)) - return false; - return true; -} - static bool lo_can_use_dio(struct loop_device *lo) { - struct inode *inode = lo->lo_backing_file->f_mapping->host; - if (!(lo->lo_backing_file->f_mode & FMODE_CAN_ODIRECT)) return false; - - if (S_ISBLK(inode->i_mode)) - return lo_bdev_can_use_dio(lo, I_BDEV(inode)); - if (inode->i_sb->s_bdev) - return lo_bdev_can_use_dio(lo, inode->i_sb->s_bdev); + if (queue_logical_block_size(lo->lo_queue) < lo->lo_min_dio_size) + return false; + if (lo->lo_offset & (lo->lo_min_dio_size - 1)) + return false; return true; } @@ -205,20 +189,12 @@ static bool lo_can_use_dio(struct loop_device *lo) */ static inline void loop_update_dio(struct loop_device *lo) { - bool dio_in_use = lo->lo_flags & LO_FLAGS_DIRECT_IO; - lockdep_assert_held(&lo->lo_mutex); WARN_ON_ONCE(lo->lo_state == Lo_bound && lo->lo_queue->mq_freeze_depth == 0); - if (lo->lo_backing_file->f_flags & O_DIRECT) - lo->lo_flags |= LO_FLAGS_DIRECT_IO; if ((lo->lo_flags & LO_FLAGS_DIRECT_IO) && !lo_can_use_dio(lo)) lo->lo_flags &= ~LO_FLAGS_DIRECT_IO; - - /* flush dirty pages before starting to issue direct I/O */ - if ((lo->lo_flags & LO_FLAGS_DIRECT_IO) && !dio_in_use) - vfs_fsync(lo->lo_backing_file, 0); } /** @@ -541,6 +517,28 @@ static void loop_reread_partitions(struct loop_device *lo) __func__, lo->lo_number, lo->lo_file_name, rc); } +static unsigned int loop_query_min_dio_size(struct loop_device *lo) +{ + struct file *file = lo->lo_backing_file; + struct block_device *sb_bdev = file->f_mapping->host->i_sb->s_bdev; + struct kstat st; + + /* + * Use the minimal dio alignment of the file system if provided. + */ + if (!vfs_getattr(&file->f_path, &st, STATX_DIOALIGN, 0) && + (st.result_mask & STATX_DIOALIGN)) + return st.dio_offset_align; + + /* + * In a perfect world this wouldn't be needed, but as of Linux 6.13 only + * a handful of file systems support the STATX_DIOALIGN flag. + */ + if (sb_bdev) + return bdev_logical_block_size(sb_bdev); + return SECTOR_SIZE; +} + static inline int is_loop_device(struct file *file) { struct inode *i = file->f_mapping->host; @@ -573,6 +571,17 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) return 0; } +static void loop_assign_backing_file(struct loop_device *lo, struct file *file) +{ + lo->lo_backing_file = file; + lo->old_gfp_mask = mapping_gfp_mask(file->f_mapping); + mapping_set_gfp_mask(file->f_mapping, + lo->old_gfp_mask & ~(__GFP_IO | __GFP_FS)); + if (lo->lo_backing_file->f_flags & O_DIRECT) + lo->lo_flags |= LO_FLAGS_DIRECT_IO; + lo->lo_min_dio_size = loop_query_min_dio_size(lo); +} + /* * loop_change_fd switched the backing store of a loopback device to * a new file. This is useful for operating system installers to free up @@ -622,14 +631,18 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, if (get_loop_size(lo, file) != get_loop_size(lo, old_file)) goto out_err; + /* + * We might switch to direct I/O mode for the loop device, write back + * all dirty data the page cache now that so that the individual I/O + * operations don't have to do that. + */ + vfs_fsync(file, 0); + /* and ... switch */ disk_force_media_change(lo->lo_disk); memflags = blk_mq_freeze_queue(lo->lo_queue); mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask); - lo->lo_backing_file = file; - lo->old_gfp_mask = mapping_gfp_mask(file->f_mapping); - mapping_set_gfp_mask(file->f_mapping, - lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); + loop_assign_backing_file(lo, file); loop_update_dio(lo); blk_mq_unfreeze_queue(lo->lo_queue, memflags); partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; @@ -971,12 +984,11 @@ loop_set_status_from_info(struct loop_device *lo, return 0; } -static unsigned int loop_default_blocksize(struct loop_device *lo, - struct block_device *backing_bdev) +static unsigned int loop_default_blocksize(struct loop_device *lo) { - /* In case of direct I/O, match underlying block size */ - if ((lo->lo_backing_file->f_flags & O_DIRECT) && backing_bdev) - return bdev_logical_block_size(backing_bdev); + /* In case of direct I/O, match underlying minimum I/O size */ + if (lo->lo_flags & LO_FLAGS_DIRECT_IO) + return lo->lo_min_dio_size; return SECTOR_SIZE; } @@ -994,7 +1006,7 @@ static void loop_update_limits(struct loop_device *lo, struct queue_limits *lim, backing_bdev = inode->i_sb->s_bdev; if (!bsize) - bsize = loop_default_blocksize(lo, backing_bdev); + bsize = loop_default_blocksize(lo); loop_get_discard_config(lo, &granularity, &max_discard_sectors); @@ -1019,7 +1031,6 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode, const struct loop_config *config) { struct file *file = fget(config->fd); - struct address_space *mapping; struct queue_limits lim; int error; loff_t size; @@ -1055,8 +1066,6 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode, if (error) goto out_unlock; - mapping = file->f_mapping; - if ((config->info.lo_flags & ~LOOP_CONFIGURE_SETTABLE_FLAGS) != 0) { error = -EINVAL; goto out_unlock; @@ -1088,9 +1097,7 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode, set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0); lo->lo_device = bdev; - lo->lo_backing_file = file; - lo->old_gfp_mask = mapping_gfp_mask(mapping); - mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); + loop_assign_backing_file(lo, file); lim = queue_limits_start_update(lo->lo_queue); loop_update_limits(lo, &lim, config->block_size); @@ -1099,6 +1106,13 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode, if (error) goto out_unlock; + /* + * We might switch to direct I/O mode for the loop device, write back + * all dirty data the page cache now that so that the individual I/O + * operations don't have to do that. + */ + vfs_fsync(file, 0); + loop_update_dio(lo); loop_sysfs_init(lo); diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 95361099a2dc..0d619df03fa9 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -2056,7 +2056,7 @@ static void mtip_hw_submit_io(struct driver_data *dd, struct request *rq, unsigned int nents; /* Map the scatter list for DMA access */ - nents = blk_rq_map_sg(hctx->queue, rq, command->sg); + nents = blk_rq_map_sg(rq, command->sg); nents = dma_map_sg(&dd->pdev->dev, command->sg, nents, dma_dir); prefetch(&port->flags); diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index da1ecbf988b8..3bb9cee0a9b5 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -473,6 +473,8 @@ NULLB_DEVICE_ATTR(shared_tags, bool, NULL); NULLB_DEVICE_ATTR(shared_tag_bitmap, bool, NULL); NULLB_DEVICE_ATTR(fua, bool, NULL); NULLB_DEVICE_ATTR(rotational, bool, NULL); +NULLB_DEVICE_ATTR(badblocks_once, bool, NULL); +NULLB_DEVICE_ATTR(badblocks_partial_io, bool, NULL); static ssize_t nullb_device_power_show(struct config_item *item, char *page) { @@ -559,14 +561,14 @@ static ssize_t nullb_device_badblocks_store(struct config_item *item, goto out; /* enable badblocks */ cmpxchg(&t_dev->badblocks.shift, -1, 0); - if (buf[0] == '+') - ret = badblocks_set(&t_dev->badblocks, start, - end - start + 1, 1); - else - ret = badblocks_clear(&t_dev->badblocks, start, - end - start + 1); - if (ret == 0) + if (buf[0] == '+') { + if (badblocks_set(&t_dev->badblocks, start, + end - start + 1, 1)) + ret = count; + } else if (badblocks_clear(&t_dev->badblocks, start, + end - start + 1)) { ret = count; + } out: kfree(orig); return ret; @@ -592,41 +594,43 @@ static ssize_t nullb_device_zone_offline_store(struct config_item *item, CONFIGFS_ATTR_WO(nullb_device_, zone_offline); static struct configfs_attribute *nullb_device_attrs[] = { - &nullb_device_attr_size, + &nullb_device_attr_badblocks, + &nullb_device_attr_badblocks_once, + &nullb_device_attr_badblocks_partial_io, + &nullb_device_attr_blocking, + &nullb_device_attr_blocksize, + &nullb_device_attr_cache_size, &nullb_device_attr_completion_nsec, - &nullb_device_attr_submit_queues, - &nullb_device_attr_poll_queues, + &nullb_device_attr_discard, + &nullb_device_attr_fua, &nullb_device_attr_home_node, - &nullb_device_attr_queue_mode, - &nullb_device_attr_blocksize, - &nullb_device_attr_max_sectors, - &nullb_device_attr_irqmode, &nullb_device_attr_hw_queue_depth, &nullb_device_attr_index, - &nullb_device_attr_blocking, - &nullb_device_attr_use_per_node_hctx, - &nullb_device_attr_power, - &nullb_device_attr_memory_backed, - &nullb_device_attr_discard, + &nullb_device_attr_irqmode, + &nullb_device_attr_max_sectors, &nullb_device_attr_mbps, - &nullb_device_attr_cache_size, - &nullb_device_attr_badblocks, - &nullb_device_attr_zoned, - &nullb_device_attr_zone_size, + &nullb_device_attr_memory_backed, + &nullb_device_attr_no_sched, + &nullb_device_attr_poll_queues, + &nullb_device_attr_power, + &nullb_device_attr_queue_mode, + &nullb_device_attr_rotational, + &nullb_device_attr_shared_tag_bitmap, + &nullb_device_attr_shared_tags, + &nullb_device_attr_size, + &nullb_device_attr_submit_queues, + &nullb_device_attr_use_per_node_hctx, + &nullb_device_attr_virt_boundary, + &nullb_device_attr_zone_append_max_sectors, &nullb_device_attr_zone_capacity, - &nullb_device_attr_zone_nr_conv, - &nullb_device_attr_zone_max_open, + &nullb_device_attr_zone_full, &nullb_device_attr_zone_max_active, - &nullb_device_attr_zone_append_max_sectors, - &nullb_device_attr_zone_readonly, + &nullb_device_attr_zone_max_open, + &nullb_device_attr_zone_nr_conv, &nullb_device_attr_zone_offline, - &nullb_device_attr_zone_full, - &nullb_device_attr_virt_boundary, - &nullb_device_attr_no_sched, - &nullb_device_attr_shared_tags, - &nullb_device_attr_shared_tag_bitmap, - &nullb_device_attr_fua, - &nullb_device_attr_rotational, + &nullb_device_attr_zone_readonly, + &nullb_device_attr_zone_size, + &nullb_device_attr_zoned, NULL, }; @@ -704,16 +708,28 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item) static ssize_t memb_group_features_show(struct config_item *item, char *page) { - return snprintf(page, PAGE_SIZE, - "badblocks,blocking,blocksize,cache_size,fua," - "completion_nsec,discard,home_node,hw_queue_depth," - "irqmode,max_sectors,mbps,memory_backed,no_sched," - "poll_queues,power,queue_mode,shared_tag_bitmap," - "shared_tags,size,submit_queues,use_per_node_hctx," - "virt_boundary,zoned,zone_capacity,zone_max_active," - "zone_max_open,zone_nr_conv,zone_offline,zone_readonly," - "zone_size,zone_append_max_sectors,zone_full," - "rotational\n"); + + struct configfs_attribute **entry; + char delimiter = ','; + size_t left = PAGE_SIZE; + size_t written = 0; + int ret; + + for (entry = &nullb_device_attrs[0]; *entry && left > 0; entry++) { + if (!*(entry + 1)) + delimiter = '\n'; + ret = snprintf(page + written, left, "%s%c", (*entry)->ca_name, + delimiter); + if (ret >= left) { + WARN_ONCE(1, "Too many null_blk features to print\n"); + memzero_explicit(page, PAGE_SIZE); + return -ENOBUFS; + } + left -= ret; + written += ret; + } + + return written; } CONFIGFS_ATTR_RO(memb_group_, features); @@ -1249,25 +1265,37 @@ static int null_transfer(struct nullb *nullb, struct page *page, return err; } -static blk_status_t null_handle_rq(struct nullb_cmd *cmd) +/* + * Transfer data for the given request. The transfer size is capped with the + * nr_sectors argument. + */ +static blk_status_t null_handle_data_transfer(struct nullb_cmd *cmd, + sector_t nr_sectors) { struct request *rq = blk_mq_rq_from_pdu(cmd); struct nullb *nullb = cmd->nq->dev->nullb; int err = 0; unsigned int len; sector_t sector = blk_rq_pos(rq); + unsigned int max_bytes = nr_sectors << SECTOR_SHIFT; + unsigned int transferred_bytes = 0; struct req_iterator iter; struct bio_vec bvec; spin_lock_irq(&nullb->lock); rq_for_each_segment(bvec, rq, iter) { len = bvec.bv_len; + if (transferred_bytes + len > max_bytes) + len = max_bytes - transferred_bytes; err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset, op_is_write(req_op(rq)), sector, rq->cmd_flags & REQ_FUA); if (err) break; sector += len >> SECTOR_SHIFT; + transferred_bytes += len; + if (transferred_bytes >= max_bytes) + break; } spin_unlock_irq(&nullb->lock); @@ -1295,31 +1323,51 @@ static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd) return sts; } -static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd, - sector_t sector, - sector_t nr_sectors) +/* + * Check if the command should fail for the badblocks. If so, return + * BLK_STS_IOERR and return number of partial I/O sectors to be written or read, + * which may be less than the requested number of sectors. + * + * @cmd: The command to handle. + * @sector: The start sector for I/O. + * @nr_sectors: Specifies number of sectors to write or read, and returns the + * number of sectors to be written or read. + */ +blk_status_t null_handle_badblocks(struct nullb_cmd *cmd, sector_t sector, + unsigned int *nr_sectors) { struct badblocks *bb = &cmd->nq->dev->badblocks; - sector_t first_bad; - int bad_sectors; + struct nullb_device *dev = cmd->nq->dev; + unsigned int block_sectors = dev->blocksize >> SECTOR_SHIFT; + sector_t first_bad, bad_sectors; + unsigned int partial_io_sectors = 0; - if (badblocks_check(bb, sector, nr_sectors, &first_bad, &bad_sectors)) - return BLK_STS_IOERR; + if (!badblocks_check(bb, sector, *nr_sectors, &first_bad, &bad_sectors)) + return BLK_STS_OK; - return BLK_STS_OK; + if (cmd->nq->dev->badblocks_once) + badblocks_clear(bb, first_bad, bad_sectors); + + if (cmd->nq->dev->badblocks_partial_io) { + if (!IS_ALIGNED(first_bad, block_sectors)) + first_bad = ALIGN_DOWN(first_bad, block_sectors); + if (sector < first_bad) + partial_io_sectors = first_bad - sector; + } + *nr_sectors = partial_io_sectors; + + return BLK_STS_IOERR; } -static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, - enum req_op op, - sector_t sector, - sector_t nr_sectors) +blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, enum req_op op, + sector_t sector, sector_t nr_sectors) { struct nullb_device *dev = cmd->nq->dev; if (op == REQ_OP_DISCARD) return null_handle_discard(dev, sector, nr_sectors); - return null_handle_rq(cmd); + return null_handle_data_transfer(cmd, nr_sectors); } static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd) @@ -1366,18 +1414,19 @@ blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op, sector_t sector, unsigned int nr_sectors) { struct nullb_device *dev = cmd->nq->dev; + blk_status_t badblocks_ret = BLK_STS_OK; blk_status_t ret; - if (dev->badblocks.shift != -1) { - ret = null_handle_badblocks(cmd, sector, nr_sectors); + if (dev->badblocks.shift != -1) + badblocks_ret = null_handle_badblocks(cmd, sector, &nr_sectors); + + if (dev->memory_backed && nr_sectors) { + ret = null_handle_memory_backed(cmd, op, sector, nr_sectors); if (ret != BLK_STS_OK) return ret; } - if (dev->memory_backed) - return null_handle_memory_backed(cmd, op, sector, nr_sectors); - - return BLK_STS_OK; + return badblocks_ret; } static void null_handle_cmd(struct nullb_cmd *cmd, sector_t sector, diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h index 6f9fe6171087..7bb6128dbaaf 100644 --- a/drivers/block/null_blk/null_blk.h +++ b/drivers/block/null_blk/null_blk.h @@ -63,6 +63,8 @@ struct nullb_device { unsigned long flags; /* device flags */ unsigned int curr_cache; struct badblocks badblocks; + bool badblocks_once; + bool badblocks_partial_io; unsigned int nr_zones; unsigned int nr_zones_imp_open; @@ -131,6 +133,10 @@ blk_status_t null_handle_discard(struct nullb_device *dev, sector_t sector, sector_t nr_sectors); blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op, sector_t sector, unsigned int nr_sectors); +blk_status_t null_handle_badblocks(struct nullb_cmd *cmd, sector_t sector, + unsigned int *nr_sectors); +blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, enum req_op op, + sector_t sector, sector_t nr_sectors); #ifdef CONFIG_BLK_DEV_ZONED int null_init_zoned_dev(struct nullb_device *dev, struct queue_limits *lim); diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index 0d5f9bf95229..4e5728f45989 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -353,6 +353,7 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, struct nullb_device *dev = cmd->nq->dev; unsigned int zno = null_zone_no(dev, sector); struct nullb_zone *zone = &dev->zones[zno]; + blk_status_t badblocks_ret = BLK_STS_OK; blk_status_t ret; trace_nullb_zone_op(cmd, zno, zone->cond); @@ -412,9 +413,20 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, zone->cond = BLK_ZONE_COND_IMP_OPEN; } - ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors); - if (ret != BLK_STS_OK) - goto unlock_zone; + if (dev->badblocks.shift != -1) { + badblocks_ret = null_handle_badblocks(cmd, sector, &nr_sectors); + if (badblocks_ret != BLK_STS_OK && !nr_sectors) { + ret = badblocks_ret; + goto unlock_zone; + } + } + + if (dev->memory_backed) { + ret = null_handle_memory_backed(cmd, REQ_OP_WRITE, sector, + nr_sectors); + if (ret != BLK_STS_OK) + goto unlock_zone; + } zone->wp += nr_sectors; if (zone->wp == zone->start + zone->capacity) { @@ -429,7 +441,7 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, zone->cond = BLK_ZONE_COND_FULL; } - ret = BLK_STS_OK; + ret = badblocks_ret; unlock_zone: null_unlock_zone(dev, zone); diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 82467ecde7ec..15627417f12e 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1010,7 +1010,7 @@ static int rnbd_client_xfer_request(struct rnbd_clt_dev *dev, * See queue limits. */ if ((req_op(rq) != REQ_OP_DISCARD) && (req_op(rq) != REQ_OP_WRITE_ZEROES)) - sg_cnt = blk_rq_map_sg(dev->queue, rq, iu->sgt.sgl); + sg_cnt = blk_rq_map_sg(rq, iu->sgt.sgl); if (sg_cnt == 0) sg_mark_end(&iu->sgt.sgl[0]); diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 282f81616a78..2b33fb5b949b 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -485,7 +485,7 @@ static int __send_request(struct request *req) } sg_init_table(sg, port->ring_cookies); - nsg = blk_rq_map_sg(req->q, req, sg); + nsg = blk_rq_map_sg(req, sg); len = 0; for (i = 0; i < nsg; i++) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 6d7b26ab434f..c060da409ed8 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -73,11 +73,10 @@ /* All UBLK_PARAM_TYPE_* should be included here */ #define UBLK_PARAM_TYPE_ALL \ (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD | \ - UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED) + UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED | \ + UBLK_PARAM_TYPE_DMA_ALIGN) struct ublk_rq_data { - struct llist_node node; - struct kref ref; }; @@ -144,8 +143,6 @@ struct ublk_queue { struct task_struct *ubq_daemon; char *io_cmd_buf; - struct llist_head io_cmds; - unsigned long io_addr; /* mapped vm address */ unsigned int max_io_sz; bool force_abort; @@ -494,15 +491,17 @@ static wait_queue_head_t ublk_idr_wq; /* wait until one idr is freed */ static DEFINE_MUTEX(ublk_ctl_mutex); + +#define UBLK_MAX_UBLKS UBLK_MINORS + /* - * Max ublk devices allowed to add + * Max unprivileged ublk devices allowed to add * * It can be extended to one per-user limit in future or even controlled * by cgroup. */ -#define UBLK_MAX_UBLKS UBLK_MINORS -static unsigned int ublks_max = 64; -static unsigned int ublks_added; /* protected by ublk_ctl_mutex */ +static unsigned int unprivileged_ublks_max = 64; +static unsigned int unprivileged_ublks_added; /* protected by ublk_ctl_mutex */ static struct miscdevice ublk_misc; @@ -573,6 +572,16 @@ static int ublk_validate_params(const struct ublk_device *ub) else if (ublk_dev_is_zoned(ub)) return -EINVAL; + if (ub->params.types & UBLK_PARAM_TYPE_DMA_ALIGN) { + const struct ublk_param_dma_align *p = &ub->params.dma; + + if (p->alignment >= PAGE_SIZE) + return -EINVAL; + + if (!is_power_of_2(p->alignment + 1)) + return -EINVAL; + } + return 0; } @@ -1100,7 +1109,7 @@ static void ublk_complete_rq(struct kref *ref) } /* - * Since __ublk_rq_task_work always fails requests immediately during + * Since ublk_rq_task_work_cb always fails requests immediately during * exiting, __ublk_fail_req() is only called from abort context during * exiting. So lock is unnecessary. * @@ -1146,11 +1155,14 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq, blk_mq_end_request(rq, BLK_STS_IOERR); } -static inline void __ublk_rq_task_work(struct request *req, - unsigned issue_flags) +static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, + unsigned int issue_flags) { - struct ublk_queue *ubq = req->mq_hctx->driver_data; - int tag = req->tag; + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); + struct ublk_queue *ubq = pdu->ubq; + int tag = pdu->tag; + struct request *req = blk_mq_tag_to_rq( + ubq->dev->tag_set.tags[ubq->q_id], tag); struct ublk_io *io = &ubq->ios[tag]; unsigned int mapped_bytes; @@ -1225,34 +1237,11 @@ static inline void __ublk_rq_task_work(struct request *req, ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags); } -static inline void ublk_forward_io_cmds(struct ublk_queue *ubq, - unsigned issue_flags) -{ - struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); - struct ublk_rq_data *data, *tmp; - - io_cmds = llist_reverse_order(io_cmds); - llist_for_each_entry_safe(data, tmp, io_cmds, node) - __ublk_rq_task_work(blk_mq_rq_from_pdu(data), issue_flags); -} - -static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, unsigned issue_flags) -{ - struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); - struct ublk_queue *ubq = pdu->ubq; - - ublk_forward_io_cmds(ubq, issue_flags); -} - static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) { - struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq); + struct ublk_io *io = &ubq->ios[rq->tag]; - if (llist_add(&data->node, &ubq->io_cmds)) { - struct ublk_io *io = &ubq->ios[rq->tag]; - - io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb); - } + io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb); } static enum blk_eh_timer_return ublk_timeout(struct request *rq) @@ -1445,7 +1434,7 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq) struct request *rq; /* - * Either we fail the request or ublk_rq_task_work_fn + * Either we fail the request or ublk_rq_task_work_cb * will do it */ rq = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], i); @@ -1911,10 +1900,9 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, return -EIOCBQUEUED; out: - io_uring_cmd_done(cmd, ret, 0, issue_flags); pr_devel("%s: complete: cmd op %d, tag %d ret %x io_flags %x\n", __func__, cmd_op, tag, ret, io->flags); - return -EIOCBQUEUED; + return ret; } static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, @@ -1970,7 +1958,10 @@ static inline int ublk_ch_uring_cmd_local(struct io_uring_cmd *cmd, static void ublk_ch_uring_cmd_cb(struct io_uring_cmd *cmd, unsigned int issue_flags) { - ublk_ch_uring_cmd_local(cmd, issue_flags); + int ret = ublk_ch_uring_cmd_local(cmd, issue_flags); + + if (ret != -EIOCBQUEUED) + io_uring_cmd_done(cmd, ret, 0, issue_flags); } static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) @@ -2235,7 +2226,8 @@ static int ublk_add_chdev(struct ublk_device *ub) if (ret) goto fail; - ublks_added++; + if (ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV) + unprivileged_ublks_added++; return 0; fail: put_device(dev); @@ -2264,11 +2256,16 @@ static int ublk_add_tag_set(struct ublk_device *ub) static void ublk_remove(struct ublk_device *ub) { + bool unprivileged; + ublk_stop_dev(ub); cancel_work_sync(&ub->nosrv_work); cdev_device_del(&ub->cdev, &ub->cdev_dev); + unprivileged = ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV; ublk_put_device(ub); - ublks_added--; + + if (unprivileged) + unprivileged_ublks_added--; } static struct ublk_device *ublk_get_device_from_id(int idx) @@ -2343,6 +2340,9 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) if (ub->params.basic.attrs & UBLK_ATTR_ROTATIONAL) lim.features |= BLK_FEAT_ROTATIONAL; + if (ub->params.types & UBLK_PARAM_TYPE_DMA_ALIGN) + lim.dma_alignment = ub->params.dma.alignment; + if (wait_for_completion_interruptible(&ub->completion) != 0) return -EINTR; @@ -2530,7 +2530,8 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) return ret; ret = -EACCES; - if (ublks_added >= ublks_max) + if ((info.flags & UBLK_F_UNPRIVILEGED_DEV) && + unprivileged_ublks_added >= unprivileged_ublks_max) goto out_unlock; ret = -ENOMEM; @@ -3101,10 +3102,9 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, if (ub) ublk_put_device(ub); out: - io_uring_cmd_done(cmd, ret, 0, issue_flags); pr_devel("%s: cmd done ret %d cmd_op %x, dev id %d qid %d\n", __func__, ret, cmd->cmd_op, header->dev_id, header->queue_id); - return -EIOCBQUEUED; + return ret; } static const struct file_operations ublk_ctl_fops = { @@ -3168,23 +3168,26 @@ static void __exit ublk_exit(void) module_init(ublk_init); module_exit(ublk_exit); -static int ublk_set_max_ublks(const char *buf, const struct kernel_param *kp) +static int ublk_set_max_unprivileged_ublks(const char *buf, + const struct kernel_param *kp) { return param_set_uint_minmax(buf, kp, 0, UBLK_MAX_UBLKS); } -static int ublk_get_max_ublks(char *buf, const struct kernel_param *kp) +static int ublk_get_max_unprivileged_ublks(char *buf, + const struct kernel_param *kp) { - return sysfs_emit(buf, "%u\n", ublks_max); + return sysfs_emit(buf, "%u\n", unprivileged_ublks_max); } -static const struct kernel_param_ops ublk_max_ublks_ops = { - .set = ublk_set_max_ublks, - .get = ublk_get_max_ublks, +static const struct kernel_param_ops ublk_max_unprivileged_ublks_ops = { + .set = ublk_set_max_unprivileged_ublks, + .get = ublk_get_max_unprivileged_ublks, }; -module_param_cb(ublks_max, &ublk_max_ublks_ops, &ublks_max, 0644); -MODULE_PARM_DESC(ublks_max, "max number of ublk devices allowed to add(default: 64)"); +module_param_cb(ublks_max, &ublk_max_unprivileged_ublks_ops, + &unprivileged_ublks_max, 0644); +MODULE_PARM_DESC(ublks_max, "max number of unprivileged ublk devices allowed to add(default: 64)"); MODULE_AUTHOR("Ming Lei <ming.lei@redhat.com>"); MODULE_DESCRIPTION("Userspace block device"); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 91cde76a4b3e..7cffea01d868 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -226,7 +226,7 @@ static int virtblk_map_data(struct blk_mq_hw_ctx *hctx, struct request *req, if (unlikely(err)) return -ENOMEM; - return blk_rq_map_sg(hctx->queue, req, vbr->sg_table.sgl); + return blk_rq_map_sg(req, vbr->sg_table.sgl); } static void virtblk_cleanup_cmd(struct request *req) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index edcd08a9dcef..5babe575c288 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -751,7 +751,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri id = blkif_ring_get_request(rinfo, req, &final_ring_req); ring_req = &rinfo->shadow[id].req; - num_sg = blk_rq_map_sg(req->q, req, rinfo->shadow[id].sg); + num_sg = blk_rq_map_sg(req, rinfo->shadow[id].sg); num_grant = 0; /* Calculate the number of grant used */ for_each_sg(rinfo->shadow[id].sg, sg, num_sg, i) |
