Skip to content

Commit f025061

Browse files
ming1shligit
authored andcommitted
md: raid10: don't use bio's vec table to manage resync pages
Now we allocate one page array for managing resync pages, instead of using bio's vec table to do that, and the old way is very hacky and won't work any more if multipage bvec is enabled. The introduced cost is that we need to allocate (128 + 16) * copies bytes per r10_bio, and it is fine because the inflight r10_bio for resync shouldn't be much, as pointed by Shaohua. Also bio_reset() in raid10_sync_request() and reshape_request() are removed because all bios are freshly new now in these functions and not necessary to reset any more. This patch can be thought as cleanup too. Suggested-by: Shaohua Li <shli@kernel.org> Signed-off-by: Ming Lei <tom.leiming@gmail.com> Signed-off-by: Shaohua Li <shli@fb.com>
1 parent 81fa152 commit f025061

File tree

1 file changed

+82
-52
lines changed

1 file changed

+82
-52
lines changed

drivers/md/raid10.c

Lines changed: 82 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,24 @@ static void end_reshape(struct r10conf *conf);
110110
#define raid10_log(md, fmt, args...) \
111111
do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid10 " fmt, ##args); } while (0)
112112

113+
/*
114+
* 'strct resync_pages' stores actual pages used for doing the resync
115+
* IO, and it is per-bio, so make .bi_private points to it.
116+
*/
117+
static inline struct resync_pages *get_resync_pages(struct bio *bio)
118+
{
119+
return bio->bi_private;
120+
}
121+
122+
/*
123+
* for resync bio, r10bio pointer can be retrieved from the per-bio
124+
* 'struct resync_pages'.
125+
*/
126+
static inline struct r10bio *get_resync_r10bio(struct bio *bio)
127+
{
128+
return get_resync_pages(bio)->raid_bio;
129+
}
130+
113131
static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
114132
{
115133
struct r10conf *conf = data;
@@ -140,11 +158,11 @@ static void r10bio_pool_free(void *r10_bio, void *data)
140158
static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
141159
{
142160
struct r10conf *conf = data;
143-
struct page *page;
144161
struct r10bio *r10_bio;
145162
struct bio *bio;
146-
int i, j;
147-
int nalloc;
163+
int j;
164+
int nalloc, nalloc_rp;
165+
struct resync_pages *rps;
148166

149167
r10_bio = r10bio_pool_alloc(gfp_flags, conf);
150168
if (!r10_bio)
@@ -156,6 +174,15 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
156174
else
157175
nalloc = 2; /* recovery */
158176

177+
/* allocate once for all bios */
178+
if (!conf->have_replacement)
179+
nalloc_rp = nalloc;
180+
else
181+
nalloc_rp = nalloc * 2;
182+
rps = kmalloc(sizeof(struct resync_pages) * nalloc_rp, gfp_flags);
183+
if (!rps)
184+
goto out_free_r10bio;
185+
159186
/*
160187
* Allocate bios.
161188
*/
@@ -175,36 +202,40 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
175202
* Allocate RESYNC_PAGES data pages and attach them
176203
* where needed.
177204
*/
178-
for (j = 0 ; j < nalloc; j++) {
205+
for (j = 0; j < nalloc; j++) {
179206
struct bio *rbio = r10_bio->devs[j].repl_bio;
207+
struct resync_pages *rp, *rp_repl;
208+
209+
rp = &rps[j];
210+
if (rbio)
211+
rp_repl = &rps[nalloc + j];
212+
180213
bio = r10_bio->devs[j].bio;
181-
for (i = 0; i < RESYNC_PAGES; i++) {
182-
if (j > 0 && !test_bit(MD_RECOVERY_SYNC,
183-
&conf->mddev->recovery)) {
184-
/* we can share bv_page's during recovery
185-
* and reshape */
186-
struct bio *rbio = r10_bio->devs[0].bio;
187-
page = rbio->bi_io_vec[i].bv_page;
188-
get_page(page);
189-
} else
190-
page = alloc_page(gfp_flags);
191-
if (unlikely(!page))
214+
215+
if (!j || test_bit(MD_RECOVERY_SYNC,
216+
&conf->mddev->recovery)) {
217+
if (resync_alloc_pages(rp, gfp_flags))
192218
goto out_free_pages;
219+
} else {
220+
memcpy(rp, &rps[0], sizeof(*rp));
221+
resync_get_all_pages(rp);
222+
}
193223

194-
bio->bi_io_vec[i].bv_page = page;
195-
if (rbio)
196-
rbio->bi_io_vec[i].bv_page = page;
224+
rp->idx = 0;
225+
rp->raid_bio = r10_bio;
226+
bio->bi_private = rp;
227+
if (rbio) {
228+
memcpy(rp_repl, rp, sizeof(*rp));
229+
rbio->bi_private = rp_repl;
197230
}
198231
}
199232

200233
return r10_bio;
201234

202235
out_free_pages:
203-
for ( ; i > 0 ; i--)
204-
safe_put_page(bio->bi_io_vec[i-1].bv_page);
205-
while (j--)
206-
for (i = 0; i < RESYNC_PAGES ; i++)
207-
safe_put_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page);
236+
while (--j >= 0)
237+
resync_free_pages(&rps[j * 2]);
238+
208239
j = 0;
209240
out_free_bio:
210241
for ( ; j < nalloc; j++) {
@@ -213,30 +244,34 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
213244
if (r10_bio->devs[j].repl_bio)
214245
bio_put(r10_bio->devs[j].repl_bio);
215246
}
247+
kfree(rps);
248+
out_free_r10bio:
216249
r10bio_pool_free(r10_bio, conf);
217250
return NULL;
218251
}
219252

220253
static void r10buf_pool_free(void *__r10_bio, void *data)
221254
{
222-
int i;
223255
struct r10conf *conf = data;
224256
struct r10bio *r10bio = __r10_bio;
225257
int j;
258+
struct resync_pages *rp = NULL;
226259

227-
for (j=0; j < conf->copies; j++) {
260+
for (j = conf->copies; j--; ) {
228261
struct bio *bio = r10bio->devs[j].bio;
229-
if (bio) {
230-
for (i = 0; i < RESYNC_PAGES; i++) {
231-
safe_put_page(bio->bi_io_vec[i].bv_page);
232-
bio->bi_io_vec[i].bv_page = NULL;
233-
}
234-
bio_put(bio);
235-
}
262+
263+
rp = get_resync_pages(bio);
264+
resync_free_pages(rp);
265+
bio_put(bio);
266+
236267
bio = r10bio->devs[j].repl_bio;
237268
if (bio)
238269
bio_put(bio);
239270
}
271+
272+
/* resync pages array stored in the 1st bio's .bi_private */
273+
kfree(rp);
274+
240275
r10bio_pool_free(r10bio, conf);
241276
}
242277

@@ -1917,7 +1952,7 @@ static void __end_sync_read(struct r10bio *r10_bio, struct bio *bio, int d)
19171952

19181953
static void end_sync_read(struct bio *bio)
19191954
{
1920-
struct r10bio *r10_bio = bio->bi_private;
1955+
struct r10bio *r10_bio = get_resync_r10bio(bio);
19211956
struct r10conf *conf = r10_bio->mddev->private;
19221957
int d = find_bio_disk(conf, r10_bio, bio, NULL, NULL);
19231958

@@ -1926,6 +1961,7 @@ static void end_sync_read(struct bio *bio)
19261961

19271962
static void end_reshape_read(struct bio *bio)
19281963
{
1964+
/* reshape read bio isn't allocated from r10buf_pool */
19291965
struct r10bio *r10_bio = bio->bi_private;
19301966

19311967
__end_sync_read(r10_bio, bio, r10_bio->read_slot);
@@ -1960,7 +1996,7 @@ static void end_sync_request(struct r10bio *r10_bio)
19601996

19611997
static void end_sync_write(struct bio *bio)
19621998
{
1963-
struct r10bio *r10_bio = bio->bi_private;
1999+
struct r10bio *r10_bio = get_resync_r10bio(bio);
19642000
struct mddev *mddev = r10_bio->mddev;
19652001
struct r10conf *conf = mddev->private;
19662002
int d;
@@ -2040,6 +2076,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
20402076
for (i=0 ; i < conf->copies ; i++) {
20412077
int j, d;
20422078
struct md_rdev *rdev;
2079+
struct resync_pages *rp;
20432080

20442081
tbio = r10_bio->devs[i].bio;
20452082

@@ -2081,11 +2118,13 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
20812118
* First we need to fixup bv_offset, bv_len and
20822119
* bi_vecs, as the read request might have corrupted these
20832120
*/
2121+
rp = get_resync_pages(tbio);
20842122
bio_reset(tbio);
20852123

20862124
tbio->bi_vcnt = vcnt;
20872125
tbio->bi_iter.bi_size = fbio->bi_iter.bi_size;
2088-
tbio->bi_private = r10_bio;
2126+
rp->raid_bio = r10_bio;
2127+
tbio->bi_private = rp;
20892128
tbio->bi_iter.bi_sector = r10_bio->devs[i].addr;
20902129
tbio->bi_end_io = end_sync_write;
20912130
bio_set_op_attrs(tbio, REQ_OP_WRITE, 0);
@@ -3149,10 +3188,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
31493188
}
31503189
}
31513190
bio = r10_bio->devs[0].bio;
3152-
bio_reset(bio);
31533191
bio->bi_next = biolist;
31543192
biolist = bio;
3155-
bio->bi_private = r10_bio;
31563193
bio->bi_end_io = end_sync_read;
31573194
bio_set_op_attrs(bio, REQ_OP_READ, 0);
31583195
if (test_bit(FailFast, &rdev->flags))
@@ -3176,10 +3213,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
31763213

31773214
if (!test_bit(In_sync, &mrdev->flags)) {
31783215
bio = r10_bio->devs[1].bio;
3179-
bio_reset(bio);
31803216
bio->bi_next = biolist;
31813217
biolist = bio;
3182-
bio->bi_private = r10_bio;
31833218
bio->bi_end_io = end_sync_write;
31843219
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
31853220
bio->bi_iter.bi_sector = to_addr
@@ -3204,10 +3239,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
32043239
if (mreplace == NULL || bio == NULL ||
32053240
test_bit(Faulty, &mreplace->flags))
32063241
break;
3207-
bio_reset(bio);
32083242
bio->bi_next = biolist;
32093243
biolist = bio;
3210-
bio->bi_private = r10_bio;
32113244
bio->bi_end_io = end_sync_write;
32123245
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
32133246
bio->bi_iter.bi_sector = to_addr +
@@ -3329,7 +3362,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
33293362
r10_bio->devs[i].repl_bio->bi_end_io = NULL;
33303363

33313364
bio = r10_bio->devs[i].bio;
3332-
bio_reset(bio);
33333365
bio->bi_error = -EIO;
33343366
rcu_read_lock();
33353367
rdev = rcu_dereference(conf->mirrors[d].rdev);
@@ -3354,7 +3386,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
33543386
atomic_inc(&r10_bio->remaining);
33553387
bio->bi_next = biolist;
33563388
biolist = bio;
3357-
bio->bi_private = r10_bio;
33583389
bio->bi_end_io = end_sync_read;
33593390
bio_set_op_attrs(bio, REQ_OP_READ, 0);
33603391
if (test_bit(FailFast, &conf->mirrors[d].rdev->flags))
@@ -3373,13 +3404,11 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
33733404

33743405
/* Need to set up for writing to the replacement */
33753406
bio = r10_bio->devs[i].repl_bio;
3376-
bio_reset(bio);
33773407
bio->bi_error = -EIO;
33783408

33793409
sector = r10_bio->devs[i].addr;
33803410
bio->bi_next = biolist;
33813411
biolist = bio;
3382-
bio->bi_private = r10_bio;
33833412
bio->bi_end_io = end_sync_write;
33843413
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
33853414
if (test_bit(FailFast, &conf->mirrors[d].rdev->flags))
@@ -3418,7 +3447,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
34183447
if (len == 0)
34193448
break;
34203449
for (bio= biolist ; bio ; bio=bio->bi_next) {
3421-
page = bio->bi_io_vec[bio->bi_vcnt].bv_page;
3450+
struct resync_pages *rp = get_resync_pages(bio);
3451+
page = resync_fetch_page(rp, rp->idx++);
34223452
/*
34233453
* won't fail because the vec table is big enough
34243454
* to hold all these pages
@@ -3427,15 +3457,15 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
34273457
}
34283458
nr_sectors += len>>9;
34293459
sector_nr += len>>9;
3430-
} while (biolist->bi_vcnt < RESYNC_PAGES);
3460+
} while (get_resync_pages(biolist)->idx < RESYNC_PAGES);
34313461
r10_bio->sectors = nr_sectors;
34323462

34333463
while (biolist) {
34343464
bio = biolist;
34353465
biolist = biolist->bi_next;
34363466

34373467
bio->bi_next = NULL;
3438-
r10_bio = bio->bi_private;
3468+
r10_bio = get_resync_r10bio(bio);
34393469
r10_bio->sectors = nr_sectors;
34403470

34413471
if (bio->bi_end_io == end_sync_read) {
@@ -4326,6 +4356,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
43264356
struct bio *blist;
43274357
struct bio *bio, *read_bio;
43284358
int sectors_done = 0;
4359+
struct page **pages;
43294360

43304361
if (sector_nr == 0) {
43314362
/* If restarting in the middle, skip the initial sectors */
@@ -4476,11 +4507,9 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
44764507
if (!rdev2 || test_bit(Faulty, &rdev2->flags))
44774508
continue;
44784509

4479-
bio_reset(b);
44804510
b->bi_bdev = rdev2->bdev;
44814511
b->bi_iter.bi_sector = r10_bio->devs[s/2].addr +
44824512
rdev2->new_data_offset;
4483-
b->bi_private = r10_bio;
44844513
b->bi_end_io = end_reshape_write;
44854514
bio_set_op_attrs(b, REQ_OP_WRITE, 0);
44864515
b->bi_next = blist;
@@ -4490,8 +4519,9 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
44904519
/* Now add as many pages as possible to all of these bios. */
44914520

44924521
nr_sectors = 0;
4522+
pages = get_resync_pages(r10_bio->devs[0].bio)->pages;
44934523
for (s = 0 ; s < max_sectors; s += PAGE_SIZE >> 9) {
4494-
struct page *page = r10_bio->devs[0].bio->bi_io_vec[s/(PAGE_SIZE>>9)].bv_page;
4524+
struct page *page = pages[s / (PAGE_SIZE >> 9)];
44954525
int len = (max_sectors - s) << 9;
44964526
if (len > PAGE_SIZE)
44974527
len = PAGE_SIZE;
@@ -4675,7 +4705,7 @@ static int handle_reshape_read_error(struct mddev *mddev,
46754705

46764706
static void end_reshape_write(struct bio *bio)
46774707
{
4678-
struct r10bio *r10_bio = bio->bi_private;
4708+
struct r10bio *r10_bio = get_resync_r10bio(bio);
46794709
struct mddev *mddev = r10_bio->mddev;
46804710
struct r10conf *conf = mddev->private;
46814711
int d;

0 commit comments

Comments
 (0)