Skip to content

Commit 1d838d7

Browse files
committed
Merge tag 'md-3.7-fixes' of git://neil.brown.name/md
Pull md fixes from NeilBrown: "Several bug fixes for md in 3.7: - raid5 discard has problems - raid10 replacement devices have problems - bad block lock seqlock usage has problems - dm-raid doesn't free everything" * tag 'md-3.7-fixes' of git://neil.brown.name/md: md/raid10: decrement correct pending counter when writing to replacement. md/raid10: close race that lose writes lost when replacement completes. md/raid5: Make sure we clear R5_Discard when discard is finished. md/raid5: move resolving of reconstruct_state earlier in stripe_handle. md/raid5: round discard alignment up to power of 2. md: make sure everything is freed when dm-raid stops an array. md: Avoid write invalid address if read_seqretry returned true. md: Reassigned the parameters if read_seqretry returned true in func md_is_badblock.
2 parents a8946af + 884162d commit 1d838d7

File tree

3 files changed

+132
-105
lines changed

3 files changed

+132
-105
lines changed

drivers/md/md.c

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1817,10 +1817,10 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
18171817
memset(bbp, 0xff, PAGE_SIZE);
18181818

18191819
for (i = 0 ; i < bb->count ; i++) {
1820-
u64 internal_bb = *p++;
1820+
u64 internal_bb = p[i];
18211821
u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
18221822
| BB_LEN(internal_bb));
1823-
*bbp++ = cpu_to_le64(store_bb);
1823+
bbp[i] = cpu_to_le64(store_bb);
18241824
}
18251825
bb->changed = 0;
18261826
if (read_seqretry(&bb->lock, seq))
@@ -5294,7 +5294,7 @@ void md_stop_writes(struct mddev *mddev)
52945294
}
52955295
EXPORT_SYMBOL_GPL(md_stop_writes);
52965296

5297-
void md_stop(struct mddev *mddev)
5297+
static void __md_stop(struct mddev *mddev)
52985298
{
52995299
mddev->ready = 0;
53005300
mddev->pers->stop(mddev);
@@ -5304,6 +5304,18 @@ void md_stop(struct mddev *mddev)
53045304
mddev->pers = NULL;
53055305
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
53065306
}
5307+
5308+
void md_stop(struct mddev *mddev)
5309+
{
5310+
/* stop the array and free an attached data structures.
5311+
* This is called from dm-raid
5312+
*/
5313+
__md_stop(mddev);
5314+
bitmap_destroy(mddev);
5315+
if (mddev->bio_set)
5316+
bioset_free(mddev->bio_set);
5317+
}
5318+
53075319
EXPORT_SYMBOL_GPL(md_stop);
53085320

53095321
static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
@@ -5364,7 +5376,7 @@ static int do_md_stop(struct mddev * mddev, int mode,
53645376
set_disk_ro(disk, 0);
53655377

53665378
__md_stop_writes(mddev);
5367-
md_stop(mddev);
5379+
__md_stop(mddev);
53685380
mddev->queue->merge_bvec_fn = NULL;
53695381
mddev->queue->backing_dev_info.congested_fn = NULL;
53705382

@@ -7936,9 +7948,9 @@ int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
79367948
sector_t *first_bad, int *bad_sectors)
79377949
{
79387950
int hi;
7939-
int lo = 0;
7951+
int lo;
79407952
u64 *p = bb->page;
7941-
int rv = 0;
7953+
int rv;
79427954
sector_t target = s + sectors;
79437955
unsigned seq;
79447956

@@ -7953,7 +7965,8 @@ int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
79537965

79547966
retry:
79557967
seq = read_seqbegin(&bb->lock);
7956-
7968+
lo = 0;
7969+
rv = 0;
79577970
hi = bb->count;
79587971

79597972
/* Binary search between lo and hi for 'target'

drivers/md/raid10.c

Lines changed: 69 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,7 @@ static void raid10_end_write_request(struct bio *bio, int error)
499499
*/
500500
one_write_done(r10_bio);
501501
if (dec_rdev)
502-
rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
502+
rdev_dec_pending(rdev, conf->mddev);
503503
}
504504

505505
/*
@@ -1334,18 +1334,21 @@ static void make_request(struct mddev *mddev, struct bio * bio)
13341334
blocked_rdev = rrdev;
13351335
break;
13361336
}
1337+
if (rdev && (test_bit(Faulty, &rdev->flags)
1338+
|| test_bit(Unmerged, &rdev->flags)))
1339+
rdev = NULL;
13371340
if (rrdev && (test_bit(Faulty, &rrdev->flags)
13381341
|| test_bit(Unmerged, &rrdev->flags)))
13391342
rrdev = NULL;
13401343

13411344
r10_bio->devs[i].bio = NULL;
13421345
r10_bio->devs[i].repl_bio = NULL;
1343-
if (!rdev || test_bit(Faulty, &rdev->flags) ||
1344-
test_bit(Unmerged, &rdev->flags)) {
1346+
1347+
if (!rdev && !rrdev) {
13451348
set_bit(R10BIO_Degraded, &r10_bio->state);
13461349
continue;
13471350
}
1348-
if (test_bit(WriteErrorSeen, &rdev->flags)) {
1351+
if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
13491352
sector_t first_bad;
13501353
sector_t dev_sector = r10_bio->devs[i].addr;
13511354
int bad_sectors;
@@ -1387,8 +1390,10 @@ static void make_request(struct mddev *mddev, struct bio * bio)
13871390
max_sectors = good_sectors;
13881391
}
13891392
}
1390-
r10_bio->devs[i].bio = bio;
1391-
atomic_inc(&rdev->nr_pending);
1393+
if (rdev) {
1394+
r10_bio->devs[i].bio = bio;
1395+
atomic_inc(&rdev->nr_pending);
1396+
}
13921397
if (rrdev) {
13931398
r10_bio->devs[i].repl_bio = bio;
13941399
atomic_inc(&rrdev->nr_pending);
@@ -1444,69 +1449,71 @@ static void make_request(struct mddev *mddev, struct bio * bio)
14441449
for (i = 0; i < conf->copies; i++) {
14451450
struct bio *mbio;
14461451
int d = r10_bio->devs[i].devnum;
1447-
if (!r10_bio->devs[i].bio)
1448-
continue;
1452+
if (r10_bio->devs[i].bio) {
1453+
struct md_rdev *rdev = conf->mirrors[d].rdev;
1454+
mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1455+
md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
1456+
max_sectors);
1457+
r10_bio->devs[i].bio = mbio;
1458+
1459+
mbio->bi_sector = (r10_bio->devs[i].addr+
1460+
choose_data_offset(r10_bio,
1461+
rdev));
1462+
mbio->bi_bdev = rdev->bdev;
1463+
mbio->bi_end_io = raid10_end_write_request;
1464+
mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
1465+
mbio->bi_private = r10_bio;
14491466

1450-
mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1451-
md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
1452-
max_sectors);
1453-
r10_bio->devs[i].bio = mbio;
1467+
atomic_inc(&r10_bio->remaining);
14541468

1455-
mbio->bi_sector = (r10_bio->devs[i].addr+
1456-
choose_data_offset(r10_bio,
1457-
conf->mirrors[d].rdev));
1458-
mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
1459-
mbio->bi_end_io = raid10_end_write_request;
1460-
mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
1461-
mbio->bi_private = r10_bio;
1469+
cb = blk_check_plugged(raid10_unplug, mddev,
1470+
sizeof(*plug));
1471+
if (cb)
1472+
plug = container_of(cb, struct raid10_plug_cb,
1473+
cb);
1474+
else
1475+
plug = NULL;
1476+
spin_lock_irqsave(&conf->device_lock, flags);
1477+
if (plug) {
1478+
bio_list_add(&plug->pending, mbio);
1479+
plug->pending_cnt++;
1480+
} else {
1481+
bio_list_add(&conf->pending_bio_list, mbio);
1482+
conf->pending_count++;
1483+
}
1484+
spin_unlock_irqrestore(&conf->device_lock, flags);
1485+
if (!plug)
1486+
md_wakeup_thread(mddev->thread);
1487+
}
14621488

1463-
atomic_inc(&r10_bio->remaining);
1489+
if (r10_bio->devs[i].repl_bio) {
1490+
struct md_rdev *rdev = conf->mirrors[d].replacement;
1491+
if (rdev == NULL) {
1492+
/* Replacement just got moved to main 'rdev' */
1493+
smp_mb();
1494+
rdev = conf->mirrors[d].rdev;
1495+
}
1496+
mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1497+
md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
1498+
max_sectors);
1499+
r10_bio->devs[i].repl_bio = mbio;
1500+
1501+
mbio->bi_sector = (r10_bio->devs[i].addr +
1502+
choose_data_offset(
1503+
r10_bio, rdev));
1504+
mbio->bi_bdev = rdev->bdev;
1505+
mbio->bi_end_io = raid10_end_write_request;
1506+
mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
1507+
mbio->bi_private = r10_bio;
14641508

1465-
cb = blk_check_plugged(raid10_unplug, mddev, sizeof(*plug));
1466-
if (cb)
1467-
plug = container_of(cb, struct raid10_plug_cb, cb);
1468-
else
1469-
plug = NULL;
1470-
spin_lock_irqsave(&conf->device_lock, flags);
1471-
if (plug) {
1472-
bio_list_add(&plug->pending, mbio);
1473-
plug->pending_cnt++;
1474-
} else {
1509+
atomic_inc(&r10_bio->remaining);
1510+
spin_lock_irqsave(&conf->device_lock, flags);
14751511
bio_list_add(&conf->pending_bio_list, mbio);
14761512
conf->pending_count++;
1513+
spin_unlock_irqrestore(&conf->device_lock, flags);
1514+
if (!mddev_check_plugged(mddev))
1515+
md_wakeup_thread(mddev->thread);
14771516
}
1478-
spin_unlock_irqrestore(&conf->device_lock, flags);
1479-
if (!plug)
1480-
md_wakeup_thread(mddev->thread);
1481-
1482-
if (!r10_bio->devs[i].repl_bio)
1483-
continue;
1484-
1485-
mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1486-
md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
1487-
max_sectors);
1488-
r10_bio->devs[i].repl_bio = mbio;
1489-
1490-
/* We are actively writing to the original device
1491-
* so it cannot disappear, so the replacement cannot
1492-
* become NULL here
1493-
*/
1494-
mbio->bi_sector = (r10_bio->devs[i].addr +
1495-
choose_data_offset(
1496-
r10_bio,
1497-
conf->mirrors[d].replacement));
1498-
mbio->bi_bdev = conf->mirrors[d].replacement->bdev;
1499-
mbio->bi_end_io = raid10_end_write_request;
1500-
mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
1501-
mbio->bi_private = r10_bio;
1502-
1503-
atomic_inc(&r10_bio->remaining);
1504-
spin_lock_irqsave(&conf->device_lock, flags);
1505-
bio_list_add(&conf->pending_bio_list, mbio);
1506-
conf->pending_count++;
1507-
spin_unlock_irqrestore(&conf->device_lock, flags);
1508-
if (!mddev_check_plugged(mddev))
1509-
md_wakeup_thread(mddev->thread);
15101517
}
15111518

15121519
/* Don't remove the bias on 'remaining' (one_write_done) until

drivers/md/raid5.c

Lines changed: 43 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2774,10 +2774,12 @@ static void handle_stripe_clean_event(struct r5conf *conf,
27742774
dev = &sh->dev[i];
27752775
if (!test_bit(R5_LOCKED, &dev->flags) &&
27762776
(test_bit(R5_UPTODATE, &dev->flags) ||
2777-
test_and_clear_bit(R5_Discard, &dev->flags))) {
2777+
test_bit(R5_Discard, &dev->flags))) {
27782778
/* We can return any write requests */
27792779
struct bio *wbi, *wbi2;
27802780
pr_debug("Return write for disc %d\n", i);
2781+
if (test_and_clear_bit(R5_Discard, &dev->flags))
2782+
clear_bit(R5_UPTODATE, &dev->flags);
27812783
wbi = dev->written;
27822784
dev->written = NULL;
27832785
while (wbi && wbi->bi_sector <
@@ -2795,7 +2797,8 @@ static void handle_stripe_clean_event(struct r5conf *conf,
27952797
!test_bit(STRIPE_DEGRADED, &sh->state),
27962798
0);
27972799
}
2798-
}
2800+
} else if (test_bit(R5_Discard, &sh->dev[i].flags))
2801+
clear_bit(R5_Discard, &sh->dev[i].flags);
27992802

28002803
if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
28012804
if (atomic_dec_and_test(&conf->pending_full_writes))
@@ -3490,40 +3493,6 @@ static void handle_stripe(struct stripe_head *sh)
34903493
handle_failed_sync(conf, sh, &s);
34913494
}
34923495

3493-
/*
3494-
* might be able to return some write requests if the parity blocks
3495-
* are safe, or on a failed drive
3496-
*/
3497-
pdev = &sh->dev[sh->pd_idx];
3498-
s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx)
3499-
|| (s.failed >= 2 && s.failed_num[1] == sh->pd_idx);
3500-
qdev = &sh->dev[sh->qd_idx];
3501-
s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx)
3502-
|| (s.failed >= 2 && s.failed_num[1] == sh->qd_idx)
3503-
|| conf->level < 6;
3504-
3505-
if (s.written &&
3506-
(s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
3507-
&& !test_bit(R5_LOCKED, &pdev->flags)
3508-
&& (test_bit(R5_UPTODATE, &pdev->flags) ||
3509-
test_bit(R5_Discard, &pdev->flags))))) &&
3510-
(s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
3511-
&& !test_bit(R5_LOCKED, &qdev->flags)
3512-
&& (test_bit(R5_UPTODATE, &qdev->flags) ||
3513-
test_bit(R5_Discard, &qdev->flags))))))
3514-
handle_stripe_clean_event(conf, sh, disks, &s.return_bi);
3515-
3516-
/* Now we might consider reading some blocks, either to check/generate
3517-
* parity, or to satisfy requests
3518-
* or to load a block that is being partially written.
3519-
*/
3520-
if (s.to_read || s.non_overwrite
3521-
|| (conf->level == 6 && s.to_write && s.failed)
3522-
|| (s.syncing && (s.uptodate + s.compute < disks))
3523-
|| s.replacing
3524-
|| s.expanding)
3525-
handle_stripe_fill(sh, &s, disks);
3526-
35273496
/* Now we check to see if any write operations have recently
35283497
* completed
35293498
*/
@@ -3561,6 +3530,40 @@ static void handle_stripe(struct stripe_head *sh)
35613530
s.dec_preread_active = 1;
35623531
}
35633532

3533+
/*
3534+
* might be able to return some write requests if the parity blocks
3535+
* are safe, or on a failed drive
3536+
*/
3537+
pdev = &sh->dev[sh->pd_idx];
3538+
s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx)
3539+
|| (s.failed >= 2 && s.failed_num[1] == sh->pd_idx);
3540+
qdev = &sh->dev[sh->qd_idx];
3541+
s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx)
3542+
|| (s.failed >= 2 && s.failed_num[1] == sh->qd_idx)
3543+
|| conf->level < 6;
3544+
3545+
if (s.written &&
3546+
(s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
3547+
&& !test_bit(R5_LOCKED, &pdev->flags)
3548+
&& (test_bit(R5_UPTODATE, &pdev->flags) ||
3549+
test_bit(R5_Discard, &pdev->flags))))) &&
3550+
(s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
3551+
&& !test_bit(R5_LOCKED, &qdev->flags)
3552+
&& (test_bit(R5_UPTODATE, &qdev->flags) ||
3553+
test_bit(R5_Discard, &qdev->flags))))))
3554+
handle_stripe_clean_event(conf, sh, disks, &s.return_bi);
3555+
3556+
/* Now we might consider reading some blocks, either to check/generate
3557+
* parity, or to satisfy requests
3558+
* or to load a block that is being partially written.
3559+
*/
3560+
if (s.to_read || s.non_overwrite
3561+
|| (conf->level == 6 && s.to_write && s.failed)
3562+
|| (s.syncing && (s.uptodate + s.compute < disks))
3563+
|| s.replacing
3564+
|| s.expanding)
3565+
handle_stripe_fill(sh, &s, disks);
3566+
35643567
/* Now to consider new write requests and what else, if anything
35653568
* should be read. We do not handle new writes when:
35663569
* 1/ A 'write' operation (copy+xor) is already in flight.
@@ -5529,6 +5532,10 @@ static int run(struct mddev *mddev)
55295532
* discard data disk but write parity disk
55305533
*/
55315534
stripe = stripe * PAGE_SIZE;
5535+
/* Round up to power of 2, as discard handling
5536+
* currently assumes that */
5537+
while ((stripe-1) & stripe)
5538+
stripe = (stripe | (stripe-1)) + 1;
55325539
mddev->queue->limits.discard_alignment = stripe;
55335540
mddev->queue->limits.discard_granularity = stripe;
55345541
/*

0 commit comments

Comments
 (0)