Skip to content

Commit 1daac19

Browse files
committed
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: "A collection of fixes since the merge window; - fix for a double elevator module release, from Chao Yu. Ancient bug. - the splice() MORE flag fix from Christophe Leroy. - a fix for NVMe, fixing a patch that went in in the merge window. From Keith. - two fixes for blk-mq CPU hotplug handling, from Ming Lei. - bdi vs blockdev lifetime fix from Neil Brown, fixing and oops in md. - two blk-mq fixes from Shaohua, fixing a race on queue stop and a bad merge issue with FUA writes. - division-by-zero fix for writeback from Tejun. - a block bounce page accounting fix, making sure we inc/dec after bouncing so that pre/post IO pages match up. From Wang YanQing" * 'for-linus' of git://git.kernel.dk/linux-block: splice: sendfile() at once fails for big files blk-mq: don't lose requests if a stopped queue restarts blk-mq: fix FUA request hang block: destroy bdi before blockdev is unregistered. block:bounce: fix call inc_|dec_zone_page_state on different pages confuse value of NR_BOUNCE elevator: fix double release of elevator module writeback: use |1 instead of +1 to protect against div by zero blk-mq: fix CPU hotplug handling blk-mq: fix race between timeout and CPU hotplug NVMe: Fix VPD B0 max sectors translation
2 parents 41c64bb + 0ff28d9 commit 1daac19

File tree

11 files changed

+60
-41
lines changed

11 files changed

+60
-41
lines changed

block/blk-core.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,8 @@ void blk_cleanup_queue(struct request_queue *q)
552552
q->queue_lock = &q->__queue_lock;
553553
spin_unlock_irq(lock);
554554

555+
bdi_destroy(&q->backing_dev_info);
556+
555557
/* @q is and will stay empty, shutdown and put */
556558
blk_put_queue(q);
557559
}

block/blk-mq.c

Lines changed: 36 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -677,8 +677,11 @@ static void blk_mq_rq_timer(unsigned long priv)
677677
data.next = blk_rq_timeout(round_jiffies_up(data.next));
678678
mod_timer(&q->timeout, data.next);
679679
} else {
680-
queue_for_each_hw_ctx(q, hctx, i)
681-
blk_mq_tag_idle(hctx);
680+
queue_for_each_hw_ctx(q, hctx, i) {
681+
/* the hctx may be unmapped, so check it here */
682+
if (blk_mq_hw_queue_mapped(hctx))
683+
blk_mq_tag_idle(hctx);
684+
}
682685
}
683686
}
684687

@@ -855,6 +858,16 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
855858
spin_lock(&hctx->lock);
856859
list_splice(&rq_list, &hctx->dispatch);
857860
spin_unlock(&hctx->lock);
861+
/*
862+
* the queue is expected stopped with BLK_MQ_RQ_QUEUE_BUSY, but
863+
* it's possible the queue is stopped and restarted again
864+
* before this. Queue restart will dispatch requests. And since
865+
* requests in rq_list aren't added into hctx->dispatch yet,
866+
* the requests in rq_list might get lost.
867+
*
868+
* blk_mq_run_hw_queue() already checks the STOPPED bit
869+
**/
870+
blk_mq_run_hw_queue(hctx, true);
858871
}
859872
}
860873

@@ -1571,31 +1584,18 @@ static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu)
15711584
return NOTIFY_OK;
15721585
}
15731586

1574-
static int blk_mq_hctx_cpu_online(struct blk_mq_hw_ctx *hctx, int cpu)
1575-
{
1576-
struct request_queue *q = hctx->queue;
1577-
struct blk_mq_tag_set *set = q->tag_set;
1578-
1579-
if (set->tags[hctx->queue_num])
1580-
return NOTIFY_OK;
1581-
1582-
set->tags[hctx->queue_num] = blk_mq_init_rq_map(set, hctx->queue_num);
1583-
if (!set->tags[hctx->queue_num])
1584-
return NOTIFY_STOP;
1585-
1586-
hctx->tags = set->tags[hctx->queue_num];
1587-
return NOTIFY_OK;
1588-
}
1589-
15901587
static int blk_mq_hctx_notify(void *data, unsigned long action,
15911588
unsigned int cpu)
15921589
{
15931590
struct blk_mq_hw_ctx *hctx = data;
15941591

15951592
if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
15961593
return blk_mq_hctx_cpu_offline(hctx, cpu);
1597-
else if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
1598-
return blk_mq_hctx_cpu_online(hctx, cpu);
1594+
1595+
/*
1596+
* In case of CPU online, tags may be reallocated
1597+
* in blk_mq_map_swqueue() after mapping is updated.
1598+
*/
15991599

16001600
return NOTIFY_OK;
16011601
}
@@ -1775,6 +1775,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
17751775
unsigned int i;
17761776
struct blk_mq_hw_ctx *hctx;
17771777
struct blk_mq_ctx *ctx;
1778+
struct blk_mq_tag_set *set = q->tag_set;
17781779

17791780
queue_for_each_hw_ctx(q, hctx, i) {
17801781
cpumask_clear(hctx->cpumask);
@@ -1803,16 +1804,20 @@ static void blk_mq_map_swqueue(struct request_queue *q)
18031804
* disable it and free the request entries.
18041805
*/
18051806
if (!hctx->nr_ctx) {
1806-
struct blk_mq_tag_set *set = q->tag_set;
1807-
18081807
if (set->tags[i]) {
18091808
blk_mq_free_rq_map(set, set->tags[i], i);
18101809
set->tags[i] = NULL;
1811-
hctx->tags = NULL;
18121810
}
1811+
hctx->tags = NULL;
18131812
continue;
18141813
}
18151814

1815+
/* unmapped hw queue can be remapped after CPU topo changed */
1816+
if (!set->tags[i])
1817+
set->tags[i] = blk_mq_init_rq_map(set, i);
1818+
hctx->tags = set->tags[i];
1819+
WARN_ON(!hctx->tags);
1820+
18161821
/*
18171822
* Set the map size to the number of mapped software queues.
18181823
* This is more accurate and more efficient than looping
@@ -2090,9 +2095,16 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
20902095
*/
20912096
list_for_each_entry(q, &all_q_list, all_q_node)
20922097
blk_mq_freeze_queue_start(q);
2093-
list_for_each_entry(q, &all_q_list, all_q_node)
2098+
list_for_each_entry(q, &all_q_list, all_q_node) {
20942099
blk_mq_freeze_queue_wait(q);
20952100

2101+
/*
2102+
* timeout handler can't touch hw queue during the
2103+
* reinitialization
2104+
*/
2105+
del_timer_sync(&q->timeout);
2106+
}
2107+
20962108
list_for_each_entry(q, &all_q_list, all_q_node)
20972109
blk_mq_queue_reinit(q);
20982110

block/blk-sysfs.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -522,8 +522,6 @@ static void blk_release_queue(struct kobject *kobj)
522522

523523
blk_trace_shutdown(q);
524524

525-
bdi_destroy(&q->backing_dev_info);
526-
527525
ida_simple_remove(&blk_queue_ida, q->id);
528526
call_rcu(&q->rcu_head, blk_free_queue_rcu);
529527
}

block/bounce.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,8 +221,8 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
221221
if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force)
222222
continue;
223223

224-
inc_zone_page_state(to->bv_page, NR_BOUNCE);
225224
to->bv_page = mempool_alloc(pool, q->bounce_gfp);
225+
inc_zone_page_state(to->bv_page, NR_BOUNCE);
226226

227227
if (rw == WRITE) {
228228
char *vto, *vfrom;

block/elevator.c

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -157,18 +157,14 @@ struct elevator_queue *elevator_alloc(struct request_queue *q,
157157

158158
eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node);
159159
if (unlikely(!eq))
160-
goto err;
160+
return NULL;
161161

162162
eq->type = e;
163163
kobject_init(&eq->kobj, &elv_ktype);
164164
mutex_init(&eq->sysfs_lock);
165165
hash_init(eq->hash);
166166

167167
return eq;
168-
err:
169-
kfree(eq);
170-
elevator_put(e);
171-
return NULL;
172168
}
173169
EXPORT_SYMBOL(elevator_alloc);
174170

drivers/block/loop.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1620,8 +1620,8 @@ static int loop_add(struct loop_device **l, int i)
16201620

16211621
static void loop_remove(struct loop_device *lo)
16221622
{
1623-
del_gendisk(lo->lo_disk);
16241623
blk_cleanup_queue(lo->lo_queue);
1624+
del_gendisk(lo->lo_disk);
16251625
blk_mq_free_tag_set(&lo->tag_set);
16261626
put_disk(lo->lo_disk);
16271627
kfree(lo);

drivers/block/nvme-scsi.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -944,7 +944,8 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
944944
static int nvme_trans_bdev_limits_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
945945
u8 *inq_response, int alloc_len)
946946
{
947-
__be32 max_sectors = cpu_to_be32(queue_max_hw_sectors(ns->queue));
947+
__be32 max_sectors = cpu_to_be32(
948+
nvme_block_nr(ns, queue_max_hw_sectors(ns->queue)));
948949
__be32 max_discard = cpu_to_be32(ns->queue->limits.max_discard_sectors);
949950
__be32 discard_desc_count = cpu_to_be32(0x100);
950951

drivers/md/md.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4818,12 +4818,12 @@ static void md_free(struct kobject *ko)
48184818
if (mddev->sysfs_state)
48194819
sysfs_put(mddev->sysfs_state);
48204820

4821+
if (mddev->queue)
4822+
blk_cleanup_queue(mddev->queue);
48214823
if (mddev->gendisk) {
48224824
del_gendisk(mddev->gendisk);
48234825
put_disk(mddev->gendisk);
48244826
}
4825-
if (mddev->queue)
4826-
blk_cleanup_queue(mddev->queue);
48274827

48284828
kfree(mddev);
48294829
}

fs/splice.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1161,7 +1161,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
11611161
long ret, bytes;
11621162
umode_t i_mode;
11631163
size_t len;
1164-
int i, flags;
1164+
int i, flags, more;
11651165

11661166
/*
11671167
* We require the input being a regular file, as we don't want to
@@ -1204,6 +1204,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
12041204
* Don't block on output, we have to drain the direct pipe.
12051205
*/
12061206
sd->flags &= ~SPLICE_F_NONBLOCK;
1207+
more = sd->flags & SPLICE_F_MORE;
12071208

12081209
while (len) {
12091210
size_t read_len;
@@ -1216,6 +1217,15 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
12161217
read_len = ret;
12171218
sd->total_len = read_len;
12181219

1220+
/*
1221+
* If more data is pending, set SPLICE_F_MORE
1222+
* If this is the last data and SPLICE_F_MORE was not set
1223+
* initially, clears it.
1224+
*/
1225+
if (read_len < len)
1226+
sd->flags |= SPLICE_F_MORE;
1227+
else if (!more)
1228+
sd->flags &= ~SPLICE_F_MORE;
12191229
/*
12201230
* NOTE: nonblocking mode only applies to the input. We
12211231
* must not do the output in nonblocking mode as then we

include/linux/blk_types.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ enum rq_flag_bits {
220220

221221
/* This mask is used for both bio and request merge checking */
222222
#define REQ_NOMERGE_FLAGS \
223-
(REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA)
223+
(REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA | REQ_FLUSH_SEQ)
224224

225225
#define REQ_RAHEAD (1ULL << __REQ_RAHEAD)
226226
#define REQ_THROTTLED (1ULL << __REQ_THROTTLED)

mm/page-writeback.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -580,7 +580,7 @@ static long long pos_ratio_polynom(unsigned long setpoint,
580580
long x;
581581

582582
x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
583-
limit - setpoint + 1);
583+
(limit - setpoint) | 1);
584584
pos_ratio = x;
585585
pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
586586
pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
@@ -807,7 +807,7 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
807807
* scale global setpoint to bdi's:
808808
* bdi_setpoint = setpoint * bdi_thresh / thresh
809809
*/
810-
x = div_u64((u64)bdi_thresh << 16, thresh + 1);
810+
x = div_u64((u64)bdi_thresh << 16, thresh | 1);
811811
bdi_setpoint = setpoint * (u64)x >> 16;
812812
/*
813813
* Use span=(8*write_bw) in single bdi case as indicated by
@@ -822,7 +822,7 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
822822

823823
if (bdi_dirty < x_intercept - span / 4) {
824824
pos_ratio = div64_u64(pos_ratio * (x_intercept - bdi_dirty),
825-
x_intercept - bdi_setpoint + 1);
825+
(x_intercept - bdi_setpoint) | 1);
826826
} else
827827
pos_ratio /= 4;
828828

0 commit comments

Comments
 (0)