Skip to content

Commit ca4b2a0

Browse files
MatiasBjorlingaxboe
authored andcommitted
null_blk: add zone support
Adds support for exposing a null_blk device through the zone device interface. The interface is managed with the parameters zoned and zone_size. If zoned is set, the null_blk instance registers as a zoned block device. The zone_size parameter defines how big each zone will be. Signed-off-by: Matias Bjørling <matias.bjorling@wdc.com> Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com> Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 6dad38d commit ca4b2a0

File tree

5 files changed

+234
-3
lines changed

5 files changed

+234
-3
lines changed

Documentation/block/null_blk.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,10 @@ shared_tags=[0/1]: Default: 0
8585
0: Tag set is not shared.
8686
1: Tag set shared between devices for blk-mq. Only makes sense with
8787
nr_devices > 1, otherwise there's no tag set to share.
88+
89+
zoned=[0/1]: Default: 0
90+
0: Block device is exposed as a random-access block device.
91+
1: Block device is exposed as a host-managed zoned block device.
92+
93+
zone_size=[MB]: Default: 256
94+
Per zone size when exposed as a zoned block device. Must be a power of two.

drivers/block/Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,11 @@ obj-$(CONFIG_BLK_DEV_RBD) += rbd.o
3636
obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/
3737

3838
obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/
39-
obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o
4039
obj-$(CONFIG_ZRAM) += zram/
4140

41+
obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk_mod.o
42+
null_blk_mod-objs := null_blk.o
43+
null_blk_mod-$(CONFIG_BLK_DEV_ZONED) += null_blk_zoned.o
44+
4245
skd-y := skd_main.o
4346
swim_mod-y := swim.o swim_asm.o

drivers/block/null_blk.c

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,14 @@ static bool g_use_per_node_hctx;
180180
module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, 0444);
181181
MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
182182

183+
static bool g_zoned;
184+
module_param_named(zoned, g_zoned, bool, S_IRUGO);
185+
MODULE_PARM_DESC(zoned, "Make device as a host-managed zoned block device. Default: false");
186+
187+
static unsigned long g_zone_size = 256;
188+
module_param_named(zone_size, g_zone_size, ulong, S_IRUGO);
189+
MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256");
190+
183191
static struct nullb_device *null_alloc_dev(void);
184192
static void null_free_dev(struct nullb_device *dev);
185193
static void null_del_dev(struct nullb *nullb);
@@ -283,6 +291,8 @@ NULLB_DEVICE_ATTR(memory_backed, bool);
283291
NULLB_DEVICE_ATTR(discard, bool);
284292
NULLB_DEVICE_ATTR(mbps, uint);
285293
NULLB_DEVICE_ATTR(cache_size, ulong);
294+
NULLB_DEVICE_ATTR(zoned, bool);
295+
NULLB_DEVICE_ATTR(zone_size, ulong);
286296

287297
static ssize_t nullb_device_power_show(struct config_item *item, char *page)
288298
{
@@ -395,6 +405,8 @@ static struct configfs_attribute *nullb_device_attrs[] = {
395405
&nullb_device_attr_mbps,
396406
&nullb_device_attr_cache_size,
397407
&nullb_device_attr_badblocks,
408+
&nullb_device_attr_zoned,
409+
&nullb_device_attr_zone_size,
398410
NULL,
399411
};
400412

@@ -447,7 +459,7 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item)
447459

448460
static ssize_t memb_group_features_show(struct config_item *item, char *page)
449461
{
450-
return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache,badblocks\n");
462+
return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size\n");
451463
}
452464

453465
CONFIGFS_ATTR_RO(memb_group_, features);
@@ -506,6 +518,8 @@ static struct nullb_device *null_alloc_dev(void)
506518
dev->hw_queue_depth = g_hw_queue_depth;
507519
dev->blocking = g_blocking;
508520
dev->use_per_node_hctx = g_use_per_node_hctx;
521+
dev->zoned = g_zoned;
522+
dev->zone_size = g_zone_size;
509523
return dev;
510524
}
511525

@@ -514,6 +528,7 @@ static void null_free_dev(struct nullb_device *dev)
514528
if (!dev)
515529
return;
516530

531+
null_zone_exit(dev);
517532
badblocks_exit(&dev->badblocks);
518533
kfree(dev);
519534
}
@@ -1146,6 +1161,11 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd)
11461161
struct nullb *nullb = dev->nullb;
11471162
int err = 0;
11481163

1164+
if (req_op(cmd->rq) == REQ_OP_ZONE_REPORT) {
1165+
cmd->error = null_zone_report(nullb, cmd);
1166+
goto out;
1167+
}
1168+
11491169
if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) {
11501170
struct request *rq = cmd->rq;
11511171

@@ -1210,6 +1230,13 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd)
12101230
}
12111231
}
12121232
cmd->error = errno_to_blk_status(err);
1233+
1234+
if (!cmd->error && dev->zoned) {
1235+
if (req_op(cmd->rq) == REQ_OP_WRITE)
1236+
null_zone_write(cmd);
1237+
else if (req_op(cmd->rq) == REQ_OP_ZONE_RESET)
1238+
null_zone_reset(cmd);
1239+
}
12131240
out:
12141241
/* Complete IO by inline, softirq or timer */
12151242
switch (dev->irqmode) {
@@ -1737,6 +1764,15 @@ static int null_add_dev(struct nullb_device *dev)
17371764
blk_queue_flush_queueable(nullb->q, true);
17381765
}
17391766

1767+
if (dev->zoned) {
1768+
rv = null_zone_init(dev);
1769+
if (rv)
1770+
goto out_cleanup_blk_queue;
1771+
1772+
blk_queue_chunk_sectors(nullb->q, dev->zone_size_sects);
1773+
nullb->q->limits.zoned = BLK_ZONED_HM;
1774+
}
1775+
17401776
nullb->q->queuedata = nullb;
17411777
blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
17421778
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q);
@@ -1755,13 +1791,16 @@ static int null_add_dev(struct nullb_device *dev)
17551791

17561792
rv = null_gendisk_register(nullb);
17571793
if (rv)
1758-
goto out_cleanup_blk_queue;
1794+
goto out_cleanup_zone;
17591795

17601796
mutex_lock(&lock);
17611797
list_add_tail(&nullb->list, &nullb_list);
17621798
mutex_unlock(&lock);
17631799

17641800
return 0;
1801+
out_cleanup_zone:
1802+
if (dev->zoned)
1803+
null_zone_exit(dev);
17651804
out_cleanup_blk_queue:
17661805
blk_cleanup_queue(nullb->q);
17671806
out_cleanup_tags:
@@ -1788,6 +1827,11 @@ static int __init null_init(void)
17881827
g_bs = PAGE_SIZE;
17891828
}
17901829

1830+
if (!is_power_of_2(g_zone_size)) {
1831+
pr_err("null_blk: zone_size must be power-of-two\n");
1832+
return -EINVAL;
1833+
}
1834+
17911835
if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
17921836
if (g_submit_queues != nr_online_nodes) {
17931837
pr_warn("null_blk: submit_queues param is set to %u.\n",

drivers/block/null_blk.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,14 @@ struct nullb_device {
4141
unsigned int curr_cache;
4242
struct badblocks badblocks;
4343

44+
unsigned int nr_zones;
45+
struct blk_zone *zones;
46+
sector_t zone_size_sects;
47+
4448
unsigned long size; /* device size in MB */
4549
unsigned long completion_nsec; /* time in ns to complete a request */
4650
unsigned long cache_size; /* disk cache size in MB */
51+
unsigned long zone_size; /* zone size in MB if device is zoned */
4752
unsigned int submit_queues; /* number of submission queues */
4853
unsigned int home_node; /* home node for the device */
4954
unsigned int queue_mode; /* block interface */
@@ -57,6 +62,7 @@ struct nullb_device {
5762
bool power; /* power on/off the device */
5863
bool memory_backed; /* if data is stored in memory */
5964
bool discard; /* if support discard */
65+
bool zoned; /* if device is zoned */
6066
};
6167

6268
struct nullb {
@@ -77,4 +83,26 @@ struct nullb {
7783
unsigned int nr_queues;
7884
char disk_name[DISK_NAME_LEN];
7985
};
86+
87+
#ifdef CONFIG_BLK_DEV_ZONED
88+
int null_zone_init(struct nullb_device *dev);
89+
void null_zone_exit(struct nullb_device *dev);
90+
blk_status_t null_zone_report(struct nullb *nullb,
91+
struct nullb_cmd *cmd);
92+
void null_zone_write(struct nullb_cmd *cmd);
93+
void null_zone_reset(struct nullb_cmd *cmd);
94+
#else
95+
static inline int null_zone_init(struct nullb_device *dev)
96+
{
97+
return -EINVAL;
98+
}
99+
static inline void null_zone_exit(struct nullb_device *dev) {}
100+
static inline blk_status_t null_zone_report(struct nullb *nullb,
101+
struct nullb_cmd *cmd)
102+
{
103+
return BLK_STS_NOTSUPP;
104+
}
105+
static inline void null_zone_write(struct nullb_cmd *cmd) {}
106+
static inline void null_zone_reset(struct nullb_cmd *cmd) {}
107+
#endif /* CONFIG_BLK_DEV_ZONED */
80108
#endif /* __NULL_BLK_H */

drivers/block/null_blk_zoned.c

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
#include <linux/vmalloc.h>
3+
#include "null_blk.h"
4+
5+
/* zone_size in MBs to sectors. */
6+
#define ZONE_SIZE_SHIFT 11
7+
8+
static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect)
9+
{
10+
return sect >> ilog2(dev->zone_size_sects);
11+
}
12+
13+
int null_zone_init(struct nullb_device *dev)
14+
{
15+
sector_t dev_size = (sector_t)dev->size * 1024 * 1024;
16+
sector_t sector = 0;
17+
unsigned int i;
18+
19+
if (!is_power_of_2(dev->zone_size)) {
20+
pr_err("null_blk: zone_size must be power-of-two\n");
21+
return -EINVAL;
22+
}
23+
24+
dev->zone_size_sects = dev->zone_size << ZONE_SIZE_SHIFT;
25+
dev->nr_zones = dev_size >>
26+
(SECTOR_SHIFT + ilog2(dev->zone_size_sects));
27+
dev->zones = kvmalloc_array(dev->nr_zones, sizeof(struct blk_zone),
28+
GFP_KERNEL | __GFP_ZERO);
29+
if (!dev->zones)
30+
return -ENOMEM;
31+
32+
for (i = 0; i < dev->nr_zones; i++) {
33+
struct blk_zone *zone = &dev->zones[i];
34+
35+
zone->start = zone->wp = sector;
36+
zone->len = dev->zone_size_sects;
37+
zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ;
38+
zone->cond = BLK_ZONE_COND_EMPTY;
39+
40+
sector += dev->zone_size_sects;
41+
}
42+
43+
return 0;
44+
}
45+
46+
void null_zone_exit(struct nullb_device *dev)
47+
{
48+
kvfree(dev->zones);
49+
}
50+
51+
static void null_zone_fill_rq(struct nullb_device *dev, struct request *rq,
52+
unsigned int zno, unsigned int nr_zones)
53+
{
54+
struct blk_zone_report_hdr *hdr = NULL;
55+
struct bio_vec bvec;
56+
struct bvec_iter iter;
57+
void *addr;
58+
unsigned int zones_to_cpy;
59+
60+
bio_for_each_segment(bvec, rq->bio, iter) {
61+
addr = kmap_atomic(bvec.bv_page);
62+
63+
zones_to_cpy = bvec.bv_len / sizeof(struct blk_zone);
64+
65+
if (!hdr) {
66+
hdr = (struct blk_zone_report_hdr *)addr;
67+
hdr->nr_zones = nr_zones;
68+
zones_to_cpy--;
69+
addr += sizeof(struct blk_zone_report_hdr);
70+
}
71+
72+
zones_to_cpy = min_t(unsigned int, zones_to_cpy, nr_zones);
73+
74+
memcpy(addr, &dev->zones[zno],
75+
zones_to_cpy * sizeof(struct blk_zone));
76+
77+
kunmap_atomic(addr);
78+
79+
nr_zones -= zones_to_cpy;
80+
zno += zones_to_cpy;
81+
82+
if (!nr_zones)
83+
break;
84+
}
85+
}
86+
87+
blk_status_t null_zone_report(struct nullb *nullb,
88+
struct nullb_cmd *cmd)
89+
{
90+
struct nullb_device *dev = nullb->dev;
91+
struct request *rq = cmd->rq;
92+
unsigned int zno = null_zone_no(dev, blk_rq_pos(rq));
93+
unsigned int nr_zones = dev->nr_zones - zno;
94+
unsigned int max_zones = (blk_rq_bytes(rq) /
95+
sizeof(struct blk_zone)) - 1;
96+
97+
nr_zones = min_t(unsigned int, nr_zones, max_zones);
98+
99+
null_zone_fill_rq(nullb->dev, rq, zno, nr_zones);
100+
101+
return BLK_STS_OK;
102+
}
103+
104+
void null_zone_write(struct nullb_cmd *cmd)
105+
{
106+
struct nullb_device *dev = cmd->nq->dev;
107+
struct request *rq = cmd->rq;
108+
sector_t sector = blk_rq_pos(rq);
109+
unsigned int rq_sectors = blk_rq_sectors(rq);
110+
unsigned int zno = null_zone_no(dev, sector);
111+
struct blk_zone *zone = &dev->zones[zno];
112+
113+
switch (zone->cond) {
114+
case BLK_ZONE_COND_FULL:
115+
/* Cannot write to a full zone */
116+
cmd->error = BLK_STS_IOERR;
117+
break;
118+
case BLK_ZONE_COND_EMPTY:
119+
case BLK_ZONE_COND_IMP_OPEN:
120+
/* Writes must be at the write pointer position */
121+
if (blk_rq_pos(rq) != zone->wp) {
122+
cmd->error = BLK_STS_IOERR;
123+
break;
124+
}
125+
126+
if (zone->cond == BLK_ZONE_COND_EMPTY)
127+
zone->cond = BLK_ZONE_COND_IMP_OPEN;
128+
129+
zone->wp += rq_sectors;
130+
if (zone->wp == zone->start + zone->len)
131+
zone->cond = BLK_ZONE_COND_FULL;
132+
break;
133+
default:
134+
/* Invalid zone condition */
135+
cmd->error = BLK_STS_IOERR;
136+
break;
137+
}
138+
}
139+
140+
void null_zone_reset(struct nullb_cmd *cmd)
141+
{
142+
struct nullb_device *dev = cmd->nq->dev;
143+
struct request *rq = cmd->rq;
144+
unsigned int zno = null_zone_no(dev, blk_rq_pos(rq));
145+
struct blk_zone *zone = &dev->zones[zno];
146+
147+
zone->cond = BLK_ZONE_COND_EMPTY;
148+
zone->wp = zone->start;
149+
}

0 commit comments

Comments
 (0)