Skip to content

Commit 483cbbe

Browse files
Alexei Naberezhnovliu-song-6
authored andcommitted
md/raid5: fix 'out of memory' during raid cache recovery
This fixes the case when md array assembly fails because of raid cache recovery unable to allocate a stripe, despite attempts to replay stripes and increase cache size. This happens because stripes released by r5c_recovery_replay_stripes and raid5_set_cache_size don't become available for allocation immediately. Released stripes first are placed on conf->released_stripes list and require md thread to merge them on conf->inactive_list before they can be allocated. Patch allows final allocation attempt during cache recovery to wait for new stripes to become availabe for allocation. Cc: linux-raid@vger.kernel.org Cc: Shaohua Li <shli@kernel.org> Cc: linux-stable <stable@vger.kernel.org> # 4.10+ Fixes: b4c625c ("md/r5cache: r5cache recovery: part 1") Signed-off-by: Alexei Naberezhnov <anaberezhnov@fb.com> Signed-off-by: Song Liu <songliubraving@fb.com>
1 parent f17b5f0 commit 483cbbe

File tree

2 files changed

+28
-13
lines changed

2 files changed

+28
-13
lines changed

drivers/md/raid5-cache.c

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1935,12 +1935,14 @@ r5l_recovery_replay_one_stripe(struct r5conf *conf,
19351935
}
19361936

19371937
static struct stripe_head *
1938-
r5c_recovery_alloc_stripe(struct r5conf *conf,
1939-
sector_t stripe_sect)
1938+
r5c_recovery_alloc_stripe(
1939+
struct r5conf *conf,
1940+
sector_t stripe_sect,
1941+
int noblock)
19401942
{
19411943
struct stripe_head *sh;
19421944

1943-
sh = raid5_get_active_stripe(conf, stripe_sect, 0, 1, 0);
1945+
sh = raid5_get_active_stripe(conf, stripe_sect, 0, noblock, 0);
19441946
if (!sh)
19451947
return NULL; /* no more stripe available */
19461948

@@ -2150,7 +2152,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
21502152
stripe_sect);
21512153

21522154
if (!sh) {
2153-
sh = r5c_recovery_alloc_stripe(conf, stripe_sect);
2155+
sh = r5c_recovery_alloc_stripe(conf, stripe_sect, 1);
21542156
/*
21552157
* cannot get stripe from raid5_get_active_stripe
21562158
* try replay some stripes
@@ -2159,20 +2161,29 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
21592161
r5c_recovery_replay_stripes(
21602162
cached_stripe_list, ctx);
21612163
sh = r5c_recovery_alloc_stripe(
2162-
conf, stripe_sect);
2164+
conf, stripe_sect, 1);
21632165
}
21642166
if (!sh) {
2167+
int new_size = conf->min_nr_stripes * 2;
21652168
pr_debug("md/raid:%s: Increasing stripe cache size to %d to recovery data on journal.\n",
21662169
mdname(mddev),
2167-
conf->min_nr_stripes * 2);
2168-
raid5_set_cache_size(mddev,
2169-
conf->min_nr_stripes * 2);
2170-
sh = r5c_recovery_alloc_stripe(conf,
2171-
stripe_sect);
2170+
new_size);
2171+
ret = raid5_set_cache_size(mddev, new_size);
2172+
if (conf->min_nr_stripes <= new_size / 2) {
2173+
pr_err("md/raid:%s: Cannot increase cache size, ret=%d, new_size=%d, min_nr_stripes=%d, max_nr_stripes=%d\n",
2174+
mdname(mddev),
2175+
ret,
2176+
new_size,
2177+
conf->min_nr_stripes,
2178+
conf->max_nr_stripes);
2179+
return -ENOMEM;
2180+
}
2181+
sh = r5c_recovery_alloc_stripe(
2182+
conf, stripe_sect, 0);
21722183
}
21732184
if (!sh) {
21742185
pr_err("md/raid:%s: Cannot get enough stripes due to memory pressure. Recovery failed.\n",
2175-
mdname(mddev));
2186+
mdname(mddev));
21762187
return -ENOMEM;
21772188
}
21782189
list_add_tail(&sh->lru, cached_stripe_list);

drivers/md/raid5.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6369,6 +6369,7 @@ raid5_show_stripe_cache_size(struct mddev *mddev, char *page)
63696369
int
63706370
raid5_set_cache_size(struct mddev *mddev, int size)
63716371
{
6372+
int result = 0;
63726373
struct r5conf *conf = mddev->private;
63736374

63746375
if (size <= 16 || size > 32768)
@@ -6385,11 +6386,14 @@ raid5_set_cache_size(struct mddev *mddev, int size)
63856386

63866387
mutex_lock(&conf->cache_size_mutex);
63876388
while (size > conf->max_nr_stripes)
6388-
if (!grow_one_stripe(conf, GFP_KERNEL))
6389+
if (!grow_one_stripe(conf, GFP_KERNEL)) {
6390+
conf->min_nr_stripes = conf->max_nr_stripes;
6391+
result = -ENOMEM;
63896392
break;
6393+
}
63906394
mutex_unlock(&conf->cache_size_mutex);
63916395

6392-
return 0;
6396+
return result;
63936397
}
63946398
EXPORT_SYMBOL(raid5_set_cache_size);
63956399

0 commit comments

Comments
 (0)