Skip to content

Commit 2110cf0

Browse files
committed
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe: "I've got a few bits pending for 3.8 final, that I better get sent out. It's all been sitting for a while, I consider it safe. It contains: - Two bug fixes for mtip32xx, fixing a driver hang and a crash. - A few-liner protocol error fix for drbd. - A few fixes for the xen block front/back driver, fixing a potential data corruption issue. - A race fix for disk_clear_events(), causing spurious warnings. Out of the Chrome OS base. - A deadlock fix for disk_clear_events(), moving it to the a unfreezable workqueue. Also from the Chrome OS base." * 'for-linus' of git://git.kernel.dk/linux-block: drbd: fix potential protocol error and resulting disconnect/reconnect mtip32xx: fix for crash when the device surprise removed during rebuild mtip32xx: fix for driver hang after a command timeout block: prevent race/cleanup block: remove deadlock in disk_clear_events xen-blkfront: handle bvecs with partial data llist/xen-blkfront: implement safe version of llist_for_each_entry xen-blkback: implement safe iterator for the list of persistent grants
2 parents 1589a3e + 1383923 commit 2110cf0

File tree

8 files changed

+101
-28
lines changed

8 files changed

+101
-28
lines changed

block/genhd.c

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ static DEFINE_IDR(ext_devt_idr);
3535

3636
static struct device_type disk_type;
3737

38+
static void disk_check_events(struct disk_events *ev,
39+
unsigned int *clearing_ptr);
3840
static void disk_alloc_events(struct gendisk *disk);
3941
static void disk_add_events(struct gendisk *disk);
4042
static void disk_del_events(struct gendisk *disk);
@@ -1549,6 +1551,7 @@ unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
15491551
const struct block_device_operations *bdops = disk->fops;
15501552
struct disk_events *ev = disk->ev;
15511553
unsigned int pending;
1554+
unsigned int clearing = mask;
15521555

15531556
if (!ev) {
15541557
/* for drivers still using the old ->media_changed method */
@@ -1558,34 +1561,53 @@ unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
15581561
return 0;
15591562
}
15601563

1561-
/* tell the workfn about the events being cleared */
1564+
disk_block_events(disk);
1565+
1566+
/*
1567+
* store the union of mask and ev->clearing on the stack so that the
1568+
* race with disk_flush_events does not cause ambiguity (ev->clearing
1569+
* can still be modified even if events are blocked).
1570+
*/
15621571
spin_lock_irq(&ev->lock);
1563-
ev->clearing |= mask;
1572+
clearing |= ev->clearing;
1573+
ev->clearing = 0;
15641574
spin_unlock_irq(&ev->lock);
15651575

1566-
/* uncondtionally schedule event check and wait for it to finish */
1567-
disk_block_events(disk);
1568-
queue_delayed_work(system_freezable_wq, &ev->dwork, 0);
1569-
flush_delayed_work(&ev->dwork);
1570-
__disk_unblock_events(disk, false);
1576+
disk_check_events(ev, &clearing);
1577+
/*
1578+
* if ev->clearing is not 0, the disk_flush_events got called in the
1579+
* middle of this function, so we want to run the workfn without delay.
1580+
*/
1581+
__disk_unblock_events(disk, ev->clearing ? true : false);
15711582

15721583
/* then, fetch and clear pending events */
15731584
spin_lock_irq(&ev->lock);
1574-
WARN_ON_ONCE(ev->clearing & mask); /* cleared by workfn */
15751585
pending = ev->pending & mask;
15761586
ev->pending &= ~mask;
15771587
spin_unlock_irq(&ev->lock);
1588+
WARN_ON_ONCE(clearing & mask);
15781589

15791590
return pending;
15801591
}
15811592

1593+
/*
1594+
* Separate this part out so that a different pointer for clearing_ptr can be
1595+
* passed in for disk_clear_events.
1596+
*/
15821597
static void disk_events_workfn(struct work_struct *work)
15831598
{
15841599
struct delayed_work *dwork = to_delayed_work(work);
15851600
struct disk_events *ev = container_of(dwork, struct disk_events, dwork);
1601+
1602+
disk_check_events(ev, &ev->clearing);
1603+
}
1604+
1605+
static void disk_check_events(struct disk_events *ev,
1606+
unsigned int *clearing_ptr)
1607+
{
15861608
struct gendisk *disk = ev->disk;
15871609
char *envp[ARRAY_SIZE(disk_uevents) + 1] = { };
1588-
unsigned int clearing = ev->clearing;
1610+
unsigned int clearing = *clearing_ptr;
15891611
unsigned int events;
15901612
unsigned long intv;
15911613
int nr_events = 0, i;
@@ -1598,7 +1620,7 @@ static void disk_events_workfn(struct work_struct *work)
15981620

15991621
events &= ~ev->pending;
16001622
ev->pending |= events;
1601-
ev->clearing &= ~clearing;
1623+
*clearing_ptr &= ~clearing;
16021624

16031625
intv = disk_events_poll_jiffies(disk);
16041626
if (!ev->block && intv)

drivers/block/drbd/drbd_req.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ static void wake_all_senders(struct drbd_tconn *tconn) {
168168
}
169169

170170
/* must hold resource->req_lock */
171-
static void start_new_tl_epoch(struct drbd_tconn *tconn)
171+
void start_new_tl_epoch(struct drbd_tconn *tconn)
172172
{
173173
/* no point closing an epoch, if it is empty, anyways. */
174174
if (tconn->current_tle_writes == 0)

drivers/block/drbd/drbd_req.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ struct bio_and_error {
267267
int error;
268268
};
269269

270+
extern void start_new_tl_epoch(struct drbd_tconn *tconn);
270271
extern void drbd_req_destroy(struct kref *kref);
271272
extern void _req_may_be_done(struct drbd_request *req,
272273
struct bio_and_error *m);

drivers/block/drbd/drbd_state.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -931,6 +931,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
931931
enum drbd_state_rv rv = SS_SUCCESS;
932932
enum sanitize_state_warnings ssw;
933933
struct after_state_chg_work *ascw;
934+
bool did_remote, should_do_remote;
934935

935936
os = drbd_read_state(mdev);
936937

@@ -981,11 +982,17 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
981982
(os.disk != D_DISKLESS && ns.disk == D_DISKLESS))
982983
atomic_inc(&mdev->local_cnt);
983984

985+
did_remote = drbd_should_do_remote(mdev->state);
984986
mdev->state.i = ns.i;
987+
should_do_remote = drbd_should_do_remote(mdev->state);
985988
mdev->tconn->susp = ns.susp;
986989
mdev->tconn->susp_nod = ns.susp_nod;
987990
mdev->tconn->susp_fen = ns.susp_fen;
988991

992+
/* put replicated vs not-replicated requests in seperate epochs */
993+
if (did_remote != should_do_remote)
994+
start_new_tl_epoch(mdev->tconn);
995+
989996
if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
990997
drbd_print_uuids(mdev, "attached to UUIDs");
991998

drivers/block/mtip32xx/mtip32xx.c

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -626,12 +626,13 @@ static void mtip_timeout_function(unsigned long int data)
626626
}
627627
}
628628

629-
if (cmdto_cnt && !test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
629+
if (cmdto_cnt) {
630630
print_tags(port->dd, "timed out", tagaccum, cmdto_cnt);
631-
632-
mtip_restart_port(port);
631+
if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
632+
mtip_restart_port(port);
633+
wake_up_interruptible(&port->svc_wait);
634+
}
633635
clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
634-
wake_up_interruptible(&port->svc_wait);
635636
}
636637

637638
if (port->ic_pause_timer) {
@@ -3887,7 +3888,12 @@ static int mtip_block_remove(struct driver_data *dd)
38873888
* Delete our gendisk structure. This also removes the device
38883889
* from /dev
38893890
*/
3890-
del_gendisk(dd->disk);
3891+
if (dd->disk) {
3892+
if (dd->disk->queue)
3893+
del_gendisk(dd->disk);
3894+
else
3895+
put_disk(dd->disk);
3896+
}
38913897

38923898
spin_lock(&rssd_index_lock);
38933899
ida_remove(&rssd_index_ida, dd->index);
@@ -3921,7 +3927,13 @@ static int mtip_block_shutdown(struct driver_data *dd)
39213927
"Shutting down %s ...\n", dd->disk->disk_name);
39223928

39233929
/* Delete our gendisk structure, and cleanup the blk queue. */
3924-
del_gendisk(dd->disk);
3930+
if (dd->disk) {
3931+
if (dd->disk->queue)
3932+
del_gendisk(dd->disk);
3933+
else
3934+
put_disk(dd->disk);
3935+
}
3936+
39253937

39263938
spin_lock(&rssd_index_lock);
39273939
ida_remove(&rssd_index_ida, dd->index);

drivers/block/xen-blkback/blkback.c

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -161,10 +161,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
161161
static void make_response(struct xen_blkif *blkif, u64 id,
162162
unsigned short op, int st);
163163

164-
#define foreach_grant(pos, rbtree, node) \
165-
for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node); \
164+
#define foreach_grant_safe(pos, n, rbtree, node) \
165+
for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node), \
166+
(n) = rb_next(&(pos)->node); \
166167
&(pos)->node != NULL; \
167-
(pos) = container_of(rb_next(&(pos)->node), typeof(*(pos)), node))
168+
(pos) = container_of(n, typeof(*(pos)), node), \
169+
(n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL)
168170

169171

170172
static void add_persistent_gnt(struct rb_root *root,
@@ -217,10 +219,11 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num)
217219
struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
218220
struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
219221
struct persistent_gnt *persistent_gnt;
222+
struct rb_node *n;
220223
int ret = 0;
221224
int segs_to_unmap = 0;
222225

223-
foreach_grant(persistent_gnt, root, node) {
226+
foreach_grant_safe(persistent_gnt, n, root, node) {
224227
BUG_ON(persistent_gnt->handle ==
225228
BLKBACK_INVALID_HANDLE);
226229
gnttab_set_unmap_op(&unmap[segs_to_unmap],
@@ -230,9 +233,6 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num)
230233
persistent_gnt->handle);
231234

232235
pages[segs_to_unmap] = persistent_gnt->page;
233-
rb_erase(&persistent_gnt->node, root);
234-
kfree(persistent_gnt);
235-
num--;
236236

237237
if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST ||
238238
!rb_next(&persistent_gnt->node)) {
@@ -241,6 +241,10 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num)
241241
BUG_ON(ret);
242242
segs_to_unmap = 0;
243243
}
244+
245+
rb_erase(&persistent_gnt->node, root);
246+
kfree(persistent_gnt);
247+
num--;
244248
}
245249
BUG_ON(num != 0);
246250
}

drivers/block/xen-blkfront.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -792,6 +792,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
792792
{
793793
struct llist_node *all_gnts;
794794
struct grant *persistent_gnt;
795+
struct llist_node *n;
795796

796797
/* Prevent new requests being issued until we fix things up. */
797798
spin_lock_irq(&info->io_lock);
@@ -804,7 +805,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
804805
/* Remove all persistent grants */
805806
if (info->persistent_gnts_c) {
806807
all_gnts = llist_del_all(&info->persistent_gnts);
807-
llist_for_each_entry(persistent_gnt, all_gnts, node) {
808+
llist_for_each_entry_safe(persistent_gnt, n, all_gnts, node) {
808809
gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
809810
__free_page(pfn_to_page(persistent_gnt->pfn));
810811
kfree(persistent_gnt);
@@ -835,7 +836,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
835836
static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
836837
struct blkif_response *bret)
837838
{
838-
int i;
839+
int i = 0;
839840
struct bio_vec *bvec;
840841
struct req_iterator iter;
841842
unsigned long flags;
@@ -852,7 +853,8 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
852853
*/
853854
rq_for_each_segment(bvec, s->request, iter) {
854855
BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE);
855-
i = offset >> PAGE_SHIFT;
856+
if (bvec->bv_offset < offset)
857+
i++;
856858
BUG_ON(i >= s->req.u.rw.nr_segments);
857859
shared_data = kmap_atomic(
858860
pfn_to_page(s->grants_used[i]->pfn));
@@ -861,7 +863,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
861863
bvec->bv_len);
862864
bvec_kunmap_irq(bvec_data, &flags);
863865
kunmap_atomic(shared_data);
864-
offset += bvec->bv_len;
866+
offset = bvec->bv_offset + bvec->bv_len;
865867
}
866868
}
867869
/* Add the persistent grant into the list of free grants */

include/linux/llist.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,31 @@ static inline void init_llist_head(struct llist_head *list)
124124
&(pos)->member != NULL; \
125125
(pos) = llist_entry((pos)->member.next, typeof(*(pos)), member))
126126

127+
/**
128+
* llist_for_each_entry_safe - iterate safely against remove over some entries
129+
* of lock-less list of given type.
130+
* @pos: the type * to use as a loop cursor.
131+
* @n: another type * to use as a temporary storage.
132+
* @node: the fist entry of deleted list entries.
133+
* @member: the name of the llist_node with the struct.
134+
*
135+
* In general, some entries of the lock-less list can be traversed
136+
* safely only after being removed from list, so start with an entry
137+
* instead of list head. This variant allows removal of entries
138+
* as we iterate.
139+
*
140+
* If being used on entries deleted from lock-less list directly, the
141+
* traverse order is from the newest to the oldest added entry. If
142+
* you want to traverse from the oldest to the newest, you must
143+
* reverse the order by yourself before traversing.
144+
*/
145+
#define llist_for_each_entry_safe(pos, n, node, member) \
146+
for ((pos) = llist_entry((node), typeof(*(pos)), member), \
147+
(n) = (pos)->member.next; \
148+
&(pos)->member != NULL; \
149+
(pos) = llist_entry(n, typeof(*(pos)), member), \
150+
(n) = (&(pos)->member != NULL) ? (pos)->member.next : NULL)
151+
127152
/**
128153
* llist_empty - tests whether a lock-less list is empty
129154
* @head: the list to test

0 commit comments

Comments
 (0)