Skip to content

Commit 11efae3

Browse files
committed
Merge tag 'for-5.1/block-post-20190315' of git://git.kernel.dk/linux-block
Pull more block layer changes from Jens Axboe: "This is a collection of both stragglers, and fixes that came in after I finalized the initial pull. This contains: - An MD pull request from Song, with a few minor fixes - Set of NVMe patches via Christoph - Pull request from Konrad, with a few fixes for xen/blkback - pblk fix IO calculation fix (Javier) - Segment calculation fix for pass-through (Ming) - Fallthrough annotation for blkcg (Mathieu)" * tag 'for-5.1/block-post-20190315' of git://git.kernel.dk/linux-block: (25 commits) blkcg: annotate implicit fall through nvme-tcp: support C2HData with SUCCESS flag nvmet: ignore EOPNOTSUPP for discard nvme: add proper write zeroes setup for the multipath device nvme: add proper discard setup for the multipath device nvme: remove nvme_ns_config_oncs nvme: disable Write Zeroes for qemu controllers nvmet-fc: bring Disconnect into compliance with FC-NVME spec nvmet-fc: fix issues with targetport assoc_list list walking nvme-fc: reject reconnect if io queue count is reduced to zero nvme-fc: fix numa_node when dev is null nvme-fc: use nr_phys_segments to determine existence of sgl nvme-loop: init nvmet_ctrl fatal_err_work when allocate nvme: update comment to make the code easier to read nvme: put ns_head ref if namespace fails allocation nvme-trace: fix cdw10 buffer overrun nvme: don't warn on block content change effects nvme: add get-feature to admin cmds tracer md: Fix failed allocation of md_register_thread It's wrong to add len to sector_nr in raid10 reshape twice ...
2 parents 465c209 + f6d85f0 commit 11efae3

File tree

20 files changed

+259
-128
lines changed

20 files changed

+259
-128
lines changed

Documentation/admin-guide/md.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -756,3 +756,6 @@ These currently include:
756756
The cache mode for raid5. raid5 could include an extra disk for
757757
caching. The mode can be "write-throuth" and "write-back". The
758758
default is "write-through".
759+
760+
ppl_write_hint
761+
NVMe stream ID to be set for each PPL write request.

block/blk-merge.c

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ static unsigned get_max_segment_size(struct request_queue *q,
180180
*/
181181
static bool bvec_split_segs(struct request_queue *q, struct bio_vec *bv,
182182
unsigned *nsegs, unsigned *last_seg_size,
183-
unsigned *front_seg_size, unsigned *sectors)
183+
unsigned *front_seg_size, unsigned *sectors, unsigned max_segs)
184184
{
185185
unsigned len = bv->bv_len;
186186
unsigned total_len = 0;
@@ -190,7 +190,7 @@ static bool bvec_split_segs(struct request_queue *q, struct bio_vec *bv,
190190
* Multi-page bvec may be too big to hold in one segment, so the
191191
* current bvec has to be splitted as multiple segments.
192192
*/
193-
while (len && new_nsegs + *nsegs < queue_max_segments(q)) {
193+
while (len && new_nsegs + *nsegs < max_segs) {
194194
seg_size = get_max_segment_size(q, bv->bv_offset + total_len);
195195
seg_size = min(seg_size, len);
196196

@@ -240,6 +240,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
240240
bool do_split = true;
241241
struct bio *new = NULL;
242242
const unsigned max_sectors = get_max_io_size(q, bio);
243+
const unsigned max_segs = queue_max_segments(q);
243244

244245
bio_for_each_bvec(bv, bio, iter) {
245246
/*
@@ -254,14 +255,14 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
254255
* Consider this a new segment if we're splitting in
255256
* the middle of this vector.
256257
*/
257-
if (nsegs < queue_max_segments(q) &&
258+
if (nsegs < max_segs &&
258259
sectors < max_sectors) {
259260
/* split in the middle of bvec */
260261
bv.bv_len = (max_sectors - sectors) << 9;
261262
bvec_split_segs(q, &bv, &nsegs,
262263
&seg_size,
263264
&front_seg_size,
264-
&sectors);
265+
&sectors, max_segs);
265266
}
266267
goto split;
267268
}
@@ -283,7 +284,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
283284
continue;
284285
}
285286
new_segment:
286-
if (nsegs == queue_max_segments(q))
287+
if (nsegs == max_segs)
287288
goto split;
288289

289290
bvprv = bv;
@@ -296,7 +297,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
296297
if (nsegs == 1 && seg_size > front_seg_size)
297298
front_seg_size = seg_size;
298299
} else if (bvec_split_segs(q, &bv, &nsegs, &seg_size,
299-
&front_seg_size, &sectors)) {
300+
&front_seg_size, &sectors, max_segs)) {
300301
goto split;
301302
}
302303
}
@@ -415,7 +416,7 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
415416
bvprv = bv;
416417
prev = 1;
417418
bvec_split_segs(q, &bv, &nr_phys_segs, &seg_size,
418-
&front_seg_size, NULL);
419+
&front_seg_size, NULL, UINT_MAX);
419420
}
420421
bbio = bio;
421422
}

drivers/block/xen-blkback/xenbus.c

Lines changed: 57 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -926,7 +926,7 @@ static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir)
926926
int err, i, j;
927927
struct xen_blkif *blkif = ring->blkif;
928928
struct xenbus_device *dev = blkif->be->dev;
929-
unsigned int ring_page_order, nr_grefs, evtchn;
929+
unsigned int nr_grefs, evtchn;
930930

931931
err = xenbus_scanf(XBT_NIL, dir, "event-channel", "%u",
932932
&evtchn);
@@ -936,43 +936,42 @@ static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir)
936936
return err;
937937
}
938938

939-
err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
940-
&ring_page_order);
941-
if (err != 1) {
942-
err = xenbus_scanf(XBT_NIL, dir, "ring-ref", "%u", &ring_ref[0]);
939+
nr_grefs = blkif->nr_ring_pages;
940+
941+
if (unlikely(!nr_grefs)) {
942+
WARN_ON(true);
943+
return -EINVAL;
944+
}
945+
946+
for (i = 0; i < nr_grefs; i++) {
947+
char ring_ref_name[RINGREF_NAME_LEN];
948+
949+
snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
950+
err = xenbus_scanf(XBT_NIL, dir, ring_ref_name,
951+
"%u", &ring_ref[i]);
952+
943953
if (err != 1) {
954+
if (nr_grefs == 1)
955+
break;
956+
944957
err = -EINVAL;
945-
xenbus_dev_fatal(dev, err, "reading %s/ring-ref", dir);
958+
xenbus_dev_fatal(dev, err, "reading %s/%s",
959+
dir, ring_ref_name);
946960
return err;
947961
}
948-
nr_grefs = 1;
949-
} else {
950-
unsigned int i;
962+
}
951963

952-
if (ring_page_order > xen_blkif_max_ring_order) {
964+
if (err != 1) {
965+
WARN_ON(nr_grefs != 1);
966+
967+
err = xenbus_scanf(XBT_NIL, dir, "ring-ref", "%u",
968+
&ring_ref[0]);
969+
if (err != 1) {
953970
err = -EINVAL;
954-
xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d",
955-
dir, ring_page_order,
956-
xen_blkif_max_ring_order);
971+
xenbus_dev_fatal(dev, err, "reading %s/ring-ref", dir);
957972
return err;
958973
}
959-
960-
nr_grefs = 1 << ring_page_order;
961-
for (i = 0; i < nr_grefs; i++) {
962-
char ring_ref_name[RINGREF_NAME_LEN];
963-
964-
snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
965-
err = xenbus_scanf(XBT_NIL, dir, ring_ref_name,
966-
"%u", &ring_ref[i]);
967-
if (err != 1) {
968-
err = -EINVAL;
969-
xenbus_dev_fatal(dev, err, "reading %s/%s",
970-
dir, ring_ref_name);
971-
return err;
972-
}
973-
}
974974
}
975-
blkif->nr_ring_pages = nr_grefs;
976975

977976
for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
978977
req = kzalloc(sizeof(*req), GFP_KERNEL);
@@ -1023,35 +1022,37 @@ static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir)
10231022
static int connect_ring(struct backend_info *be)
10241023
{
10251024
struct xenbus_device *dev = be->dev;
1025+
struct xen_blkif *blkif = be->blkif;
10261026
unsigned int pers_grants;
10271027
char protocol[64] = "";
10281028
int err, i;
10291029
char *xspath;
10301030
size_t xspathsize;
10311031
const size_t xenstore_path_ext_size = 11; /* sufficient for "/queue-NNN" */
10321032
unsigned int requested_num_queues = 0;
1033+
unsigned int ring_page_order;
10331034

10341035
pr_debug("%s %s\n", __func__, dev->otherend);
10351036

1036-
be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
1037+
blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
10371038
err = xenbus_scanf(XBT_NIL, dev->otherend, "protocol",
10381039
"%63s", protocol);
10391040
if (err <= 0)
10401041
strcpy(protocol, "unspecified, assuming default");
10411042
else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
1042-
be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
1043+
blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
10431044
else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
1044-
be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
1045+
blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
10451046
else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
1046-
be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
1047+
blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
10471048
else {
10481049
xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
10491050
return -ENOSYS;
10501051
}
10511052
pers_grants = xenbus_read_unsigned(dev->otherend, "feature-persistent",
10521053
0);
1053-
be->blkif->vbd.feature_gnt_persistent = pers_grants;
1054-
be->blkif->vbd.overflow_max_grants = 0;
1054+
blkif->vbd.feature_gnt_persistent = pers_grants;
1055+
blkif->vbd.overflow_max_grants = 0;
10551056

10561057
/*
10571058
* Read the number of hardware queues from frontend.
@@ -1067,16 +1068,30 @@ static int connect_ring(struct backend_info *be)
10671068
requested_num_queues, xenblk_max_queues);
10681069
return -ENOSYS;
10691070
}
1070-
be->blkif->nr_rings = requested_num_queues;
1071-
if (xen_blkif_alloc_rings(be->blkif))
1071+
blkif->nr_rings = requested_num_queues;
1072+
if (xen_blkif_alloc_rings(blkif))
10721073
return -ENOMEM;
10731074

10741075
pr_info("%s: using %d queues, protocol %d (%s) %s\n", dev->nodename,
1075-
be->blkif->nr_rings, be->blkif->blk_protocol, protocol,
1076+
blkif->nr_rings, blkif->blk_protocol, protocol,
10761077
pers_grants ? "persistent grants" : "");
10771078

1078-
if (be->blkif->nr_rings == 1)
1079-
return read_per_ring_refs(&be->blkif->rings[0], dev->otherend);
1079+
ring_page_order = xenbus_read_unsigned(dev->otherend,
1080+
"ring-page-order", 0);
1081+
1082+
if (ring_page_order > xen_blkif_max_ring_order) {
1083+
err = -EINVAL;
1084+
xenbus_dev_fatal(dev, err,
1085+
"requested ring page order %d exceed max:%d",
1086+
ring_page_order,
1087+
xen_blkif_max_ring_order);
1088+
return err;
1089+
}
1090+
1091+
blkif->nr_ring_pages = 1 << ring_page_order;
1092+
1093+
if (blkif->nr_rings == 1)
1094+
return read_per_ring_refs(&blkif->rings[0], dev->otherend);
10801095
else {
10811096
xspathsize = strlen(dev->otherend) + xenstore_path_ext_size;
10821097
xspath = kmalloc(xspathsize, GFP_KERNEL);
@@ -1085,10 +1100,10 @@ static int connect_ring(struct backend_info *be)
10851100
return -ENOMEM;
10861101
}
10871102

1088-
for (i = 0; i < be->blkif->nr_rings; i++) {
1103+
for (i = 0; i < blkif->nr_rings; i++) {
10891104
memset(xspath, 0, xspathsize);
10901105
snprintf(xspath, xspathsize, "%s/queue-%u", dev->otherend, i);
1091-
err = read_per_ring_refs(&be->blkif->rings[i], xspath);
1106+
err = read_per_ring_refs(&blkif->rings[i], xspath);
10921107
if (err) {
10931108
kfree(xspath);
10941109
return err;

drivers/lightnvm/pblk-rl.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,10 +233,15 @@ void pblk_rl_init(struct pblk_rl *rl, int budget, int threshold)
233233
/* To start with, all buffer is available to user I/O writers */
234234
rl->rb_budget = budget;
235235
rl->rb_user_max = budget;
236-
rl->rb_max_io = threshold ? (budget - threshold) : (budget - 1);
237236
rl->rb_gc_max = 0;
238237
rl->rb_state = PBLK_RL_HIGH;
239238

239+
/* Maximize I/O size and ansure that back threshold is respected */
240+
if (threshold)
241+
rl->rb_max_io = budget - pblk->min_write_pgs_data - threshold;
242+
else
243+
rl->rb_max_io = budget - pblk->min_write_pgs_data - 1;
244+
240245
atomic_set(&rl->rb_user_cnt, 0);
241246
atomic_set(&rl->rb_gc_cnt, 0);
242247
atomic_set(&rl->rb_space, -1);

drivers/md/raid10.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3939,6 +3939,8 @@ static int raid10_run(struct mddev *mddev)
39393939
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
39403940
mddev->sync_thread = md_register_thread(md_do_sync, mddev,
39413941
"reshape");
3942+
if (!mddev->sync_thread)
3943+
goto out_free_conf;
39423944
}
39433945

39443946
return 0;
@@ -4670,7 +4672,6 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
46704672
atomic_inc(&r10_bio->remaining);
46714673
read_bio->bi_next = NULL;
46724674
generic_make_request(read_bio);
4673-
sector_nr += nr_sectors;
46744675
sectors_done += nr_sectors;
46754676
if (sector_nr <= last)
46764677
goto read_more;

drivers/md/raid5-log.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ extern void ppl_stripe_write_finished(struct stripe_head *sh);
4545
extern int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add);
4646
extern void ppl_quiesce(struct r5conf *conf, int quiesce);
4747
extern int ppl_handle_flush_request(struct r5l_log *log, struct bio *bio);
48+
extern struct md_sysfs_entry ppl_write_hint;
4849

4950
static inline bool raid5_has_log(struct r5conf *conf)
5051
{

drivers/md/raid5-ppl.c

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <linux/raid/md_p.h>
2121
#include "md.h"
2222
#include "raid5.h"
23+
#include "raid5-log.h"
2324

2425
/*
2526
* PPL consists of a 4KB header (struct ppl_header) and at least 128KB for
@@ -115,6 +116,8 @@ struct ppl_conf {
115116
/* stripes to retry if failed to allocate io_unit */
116117
struct list_head no_mem_stripes;
117118
spinlock_t no_mem_stripes_lock;
119+
120+
unsigned short write_hint;
118121
};
119122

120123
struct ppl_log {
@@ -474,6 +477,7 @@ static void ppl_submit_iounit(struct ppl_io_unit *io)
474477
bio_set_dev(bio, log->rdev->bdev);
475478
bio->bi_iter.bi_sector = log->next_io_sector;
476479
bio_add_page(bio, io->header_page, PAGE_SIZE, 0);
480+
bio->bi_write_hint = ppl_conf->write_hint;
477481

478482
pr_debug("%s: log->current_io_sector: %llu\n", __func__,
479483
(unsigned long long)log->next_io_sector);
@@ -503,6 +507,7 @@ static void ppl_submit_iounit(struct ppl_io_unit *io)
503507
bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES,
504508
&ppl_conf->bs);
505509
bio->bi_opf = prev->bi_opf;
510+
bio->bi_write_hint = prev->bi_write_hint;
506511
bio_copy_dev(bio, prev);
507512
bio->bi_iter.bi_sector = bio_end_sector(prev);
508513
bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0);
@@ -1407,6 +1412,7 @@ int ppl_init_log(struct r5conf *conf)
14071412
atomic64_set(&ppl_conf->seq, 0);
14081413
INIT_LIST_HEAD(&ppl_conf->no_mem_stripes);
14091414
spin_lock_init(&ppl_conf->no_mem_stripes_lock);
1415+
ppl_conf->write_hint = RWF_WRITE_LIFE_NOT_SET;
14101416

14111417
if (!mddev->external) {
14121418
ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid));
@@ -1501,3 +1507,60 @@ int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add)
15011507

15021508
return ret;
15031509
}
1510+
1511+
static ssize_t
1512+
ppl_write_hint_show(struct mddev *mddev, char *buf)
1513+
{
1514+
size_t ret = 0;
1515+
struct r5conf *conf;
1516+
struct ppl_conf *ppl_conf = NULL;
1517+
1518+
spin_lock(&mddev->lock);
1519+
conf = mddev->private;
1520+
if (conf && raid5_has_ppl(conf))
1521+
ppl_conf = conf->log_private;
1522+
ret = sprintf(buf, "%d\n", ppl_conf ? ppl_conf->write_hint : 0);
1523+
spin_unlock(&mddev->lock);
1524+
1525+
return ret;
1526+
}
1527+
1528+
static ssize_t
1529+
ppl_write_hint_store(struct mddev *mddev, const char *page, size_t len)
1530+
{
1531+
struct r5conf *conf;
1532+
struct ppl_conf *ppl_conf;
1533+
int err = 0;
1534+
unsigned short new;
1535+
1536+
if (len >= PAGE_SIZE)
1537+
return -EINVAL;
1538+
if (kstrtou16(page, 10, &new))
1539+
return -EINVAL;
1540+
1541+
err = mddev_lock(mddev);
1542+
if (err)
1543+
return err;
1544+
1545+
conf = mddev->private;
1546+
if (!conf) {
1547+
err = -ENODEV;
1548+
} else if (raid5_has_ppl(conf)) {
1549+
ppl_conf = conf->log_private;
1550+
if (!ppl_conf)
1551+
err = -EINVAL;
1552+
else
1553+
ppl_conf->write_hint = new;
1554+
} else {
1555+
err = -EINVAL;
1556+
}
1557+
1558+
mddev_unlock(mddev);
1559+
1560+
return err ?: len;
1561+
}
1562+
1563+
struct md_sysfs_entry
1564+
ppl_write_hint = __ATTR(ppl_write_hint, S_IRUGO | S_IWUSR,
1565+
ppl_write_hint_show,
1566+
ppl_write_hint_store);

0 commit comments

Comments
 (0)