Skip to content

Commit 85ad1d1

Browse files
GuoqingJiangshligit
authored andcommitted
md: set MD_CHANGE_PENDING in a atomic region
Some code waits for a metadata update by: 1. flagging that it is needed (MD_CHANGE_DEVS or MD_CHANGE_CLEAN) 2. setting MD_CHANGE_PENDING and waking the management thread 3. waiting for MD_CHANGE_PENDING to be cleared If the first two are done without locking, the code in md_update_sb() which checks if it needs to repeat might test if an update is needed before step 1, then clear MD_CHANGE_PENDING after step 2, resulting in the wait returning early. So make sure all places that set MD_CHANGE_PENDING are atomicial, and bit_clear_unless (suggested by Neil) is introduced for the purpose. Cc: Martin Kepplinger <martink@posteo.de> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: <linux-kernel@vger.kernel.org> Reviewed-by: NeilBrown <neilb@suse.com> Signed-off-by: Guoqing Jiang <gqjiang@suse.com> Signed-off-by: Shaohua Li <shli@fb.com>
1 parent fe67d19 commit 85ad1d1

File tree

6 files changed

+40
-23
lines changed

6 files changed

+40
-23
lines changed

drivers/md/md.c

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2295,12 +2295,16 @@ void md_update_sb(struct mddev *mddev, int force_change)
22952295
if (mddev_is_clustered(mddev)) {
22962296
if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
22972297
force_change = 1;
2298+
if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
2299+
nospares = 1;
22982300
ret = md_cluster_ops->metadata_update_start(mddev);
22992301
/* Has someone else has updated the sb */
23002302
if (!does_sb_need_changing(mddev)) {
23012303
if (ret == 0)
23022304
md_cluster_ops->metadata_update_cancel(mddev);
2303-
clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2305+
bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING),
2306+
BIT(MD_CHANGE_DEVS) |
2307+
BIT(MD_CHANGE_CLEAN));
23042308
return;
23052309
}
23062310
}
@@ -2434,15 +2438,11 @@ void md_update_sb(struct mddev *mddev, int force_change)
24342438
if (mddev_is_clustered(mddev) && ret == 0)
24352439
md_cluster_ops->metadata_update_finish(mddev);
24362440

2437-
spin_lock(&mddev->lock);
24382441
if (mddev->in_sync != sync_req ||
2439-
test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
2442+
!bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING),
2443+
BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_CLEAN)))
24402444
/* have to write it out again */
2441-
spin_unlock(&mddev->lock);
24422445
goto repeat;
2443-
}
2444-
clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2445-
spin_unlock(&mddev->lock);
24462446
wake_up(&mddev->sb_wait);
24472447
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
24482448
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
@@ -8147,18 +8147,18 @@ void md_do_sync(struct md_thread *thread)
81478147
}
81488148
}
81498149
skip:
8150-
set_bit(MD_CHANGE_DEVS, &mddev->flags);
8151-
81528150
if (mddev_is_clustered(mddev) &&
81538151
ret == 0) {
81548152
/* set CHANGE_PENDING here since maybe another
81558153
* update is needed, so other nodes are informed */
8156-
set_bit(MD_CHANGE_PENDING, &mddev->flags);
8154+
set_mask_bits(&mddev->flags, 0,
8155+
BIT(MD_CHANGE_PENDING) | BIT(MD_CHANGE_DEVS));
81578156
md_wakeup_thread(mddev->thread);
81588157
wait_event(mddev->sb_wait,
81598158
!test_bit(MD_CHANGE_PENDING, &mddev->flags));
81608159
md_cluster_ops->resync_finish(mddev);
8161-
}
8160+
} else
8161+
set_bit(MD_CHANGE_DEVS, &mddev->flags);
81628162

81638163
spin_lock(&mddev->lock);
81648164
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
@@ -8550,6 +8550,7 @@ EXPORT_SYMBOL(md_finish_reshape);
85508550
int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
85518551
int is_new)
85528552
{
8553+
struct mddev *mddev = rdev->mddev;
85538554
int rv;
85548555
if (is_new)
85558556
s += rdev->new_data_offset;
@@ -8559,8 +8560,8 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
85598560
if (rv == 0) {
85608561
/* Make sure they get written out promptly */
85618562
sysfs_notify_dirent_safe(rdev->sysfs_state);
8562-
set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
8563-
set_bit(MD_CHANGE_PENDING, &rdev->mddev->flags);
8563+
set_mask_bits(&mddev->flags, 0,
8564+
BIT(MD_CHANGE_CLEAN) | BIT(MD_CHANGE_PENDING));
85648565
md_wakeup_thread(rdev->mddev->thread);
85658566
return 1;
85668567
} else

drivers/md/raid1.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1474,8 +1474,8 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
14741474
* if recovery is running, make sure it aborts.
14751475
*/
14761476
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
1477-
set_bit(MD_CHANGE_DEVS, &mddev->flags);
1478-
set_bit(MD_CHANGE_PENDING, &mddev->flags);
1477+
set_mask_bits(&mddev->flags, 0,
1478+
BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
14791479
printk(KERN_ALERT
14801480
"md/raid1:%s: Disk failure on %s, disabling device.\n"
14811481
"md/raid1:%s: Operation continuing on %d devices.\n",

drivers/md/raid10.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,8 +1102,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
11021102
bio->bi_iter.bi_sector < conf->reshape_progress))) {
11031103
/* Need to update reshape_position in metadata */
11041104
mddev->reshape_position = conf->reshape_progress;
1105-
set_bit(MD_CHANGE_DEVS, &mddev->flags);
1106-
set_bit(MD_CHANGE_PENDING, &mddev->flags);
1105+
set_mask_bits(&mddev->flags, 0,
1106+
BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
11071107
md_wakeup_thread(mddev->thread);
11081108
wait_event(mddev->sb_wait,
11091109
!test_bit(MD_CHANGE_PENDING, &mddev->flags));
@@ -1591,8 +1591,8 @@ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
15911591
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
15921592
set_bit(Blocked, &rdev->flags);
15931593
set_bit(Faulty, &rdev->flags);
1594-
set_bit(MD_CHANGE_DEVS, &mddev->flags);
1595-
set_bit(MD_CHANGE_PENDING, &mddev->flags);
1594+
set_mask_bits(&mddev->flags, 0,
1595+
BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
15961596
spin_unlock_irqrestore(&conf->device_lock, flags);
15971597
printk(KERN_ALERT
15981598
"md/raid10:%s: Disk failure on %s, disabling device.\n"

drivers/md/raid5-cache.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -712,8 +712,8 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
712712
* in_teardown check workaround this issue.
713713
*/
714714
if (!log->in_teardown) {
715-
set_bit(MD_CHANGE_DEVS, &mddev->flags);
716-
set_bit(MD_CHANGE_PENDING, &mddev->flags);
715+
set_mask_bits(&mddev->flags, 0,
716+
BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
717717
md_wakeup_thread(mddev->thread);
718718
wait_event(mddev->sb_wait,
719719
!test_bit(MD_CHANGE_PENDING, &mddev->flags) ||

drivers/md/raid5.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2514,8 +2514,8 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
25142514

25152515
set_bit(Blocked, &rdev->flags);
25162516
set_bit(Faulty, &rdev->flags);
2517-
set_bit(MD_CHANGE_DEVS, &mddev->flags);
2518-
set_bit(MD_CHANGE_PENDING, &mddev->flags);
2517+
set_mask_bits(&mddev->flags, 0,
2518+
BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
25192519
printk(KERN_ALERT
25202520
"md/raid:%s: Disk failure on %s, disabling device.\n"
25212521
"md/raid:%s: Operation continuing on %d devices.\n",

include/linux/bitops.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,22 @@ static inline unsigned long __ffs64(u64 word)
227227
})
228228
#endif
229229

230+
#ifndef bit_clear_unless
231+
#define bit_clear_unless(ptr, _clear, _test) \
232+
({ \
233+
const typeof(*ptr) clear = (_clear), test = (_test); \
234+
typeof(*ptr) old, new; \
235+
\
236+
do { \
237+
old = ACCESS_ONCE(*ptr); \
238+
new = old & ~clear; \
239+
} while (!(old & test) && \
240+
cmpxchg(ptr, old, new) != old); \
241+
\
242+
!(old & test); \
243+
})
244+
#endif
245+
230246
#ifndef find_last_bit
231247
/**
232248
* find_last_bit - find the last set bit in a memory region

0 commit comments

Comments
 (0)