Skip to content

Commit 8a39262

Browse files
committed
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: (52 commits) md: Protect access to mddev->disks list using RCU md: only count actual openers as access which prevent a 'stop' md: linear: Make array_size sector-based and rename it to array_sectors. md: Make mddev->array_size sector-based. md: Make super_type->rdev_size_change() take sector-based sizes. md: Fix check for overlapping devices. md: Tidy up rdev_size_store a bit: md: Remove some unused macros. md: Turn rdev->sb_offset into a sector-based quantity. md: Make calc_dev_sboffset() return a sector count. md: Replace calc_dev_size() by calc_num_sectors(). md: Make update_size() take the number of sectors. md: Better control of when do_md_stop is allowed to stop the array. md: get_disk_info(): Don't convert between signed and unsigned and back. md: Simplify restart_array(). md: alloc_disk_sb(): Return proper error value. md: Simplify sb_equal(). md: Simplify uuid_equal(). md: sb_equal(): Fix misleading printk. md: Fix a typo in the comment to cmd_match(). ...
2 parents 519f014 + 4b80991 commit 8a39262

File tree

16 files changed

+842
-790
lines changed

16 files changed

+842
-790
lines changed

Documentation/md.txt

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,11 @@ All md devices contain:
236236
writing the word for the desired state, however some states
237237
cannot be explicitly set, and some transitions are not allowed.
238238

239+
Select/poll works on this file. All changes except between
240+
active_idle and active (which can be frequent and are not
241+
very interesting) are notified. active->active_idle is
242+
reported if the metadata is externally managed.
243+
239244
clear
240245
No devices, no size, no level
241246
Writing is equivalent to STOP_ARRAY ioctl
@@ -292,6 +297,10 @@ Each directory contains:
292297
writemostly - device will only be subject to read
293298
requests if there are no other options.
294299
This applies only to raid1 arrays.
300+
blocked - device has failed, metadata is "external",
301+
and the failure hasn't been acknowledged yet.
302+
Writes that would write to this device if
303+
it were not faulty are blocked.
295304
spare - device is working, but not a full member.
296305
This includes spares that are in the process
297306
of being recovered to
@@ -301,6 +310,12 @@ Each directory contains:
301310
Writing "remove" removes the device from the array.
302311
Writing "writemostly" sets the writemostly flag.
303312
Writing "-writemostly" clears the writemostly flag.
313+
Writing "blocked" sets the "blocked" flag.
314+
Writing "-blocked" clear the "blocked" flag and allows writes
315+
to complete.
316+
317+
This file responds to select/poll. Any change to 'faulty'
318+
or 'blocked' causes an event.
304319

305320
errors
306321
An approximate count of read errors that have been detected on
@@ -332,7 +347,7 @@ Each directory contains:
332347
for storage of data. This will normally be the same as the
333348
component_size. This can be written while assembling an
334349
array. If a value less than the current component_size is
335-
written, component_size will be reduced to this value.
350+
written, it will be rejected.
336351

337352

338353
An active md device will also contain and entry for each active device
@@ -381,6 +396,19 @@ also have
381396
'check' and 'repair' will start the appropriate process
382397
providing the current state is 'idle'.
383398

399+
This file responds to select/poll. Any important change in the value
400+
triggers a poll event. Sometimes the value will briefly be
401+
"recover" if a recovery seems to be needed, but cannot be
402+
achieved. In that case, the transition to "recover" isn't
403+
notified, but the transition away is.
404+
405+
degraded
406+
This contains a count of the number of devices by which the
407+
arrays is degraded. So an optimal array with show '0'. A
408+
single failed/missing drive will show '1', etc.
409+
This file responds to select/poll, any increase or decrease
410+
in the count of missing devices will trigger an event.
411+
384412
mismatch_count
385413
When performing 'check' and 'repair', and possibly when
386414
performing 'resync', md will count the number of errors that are

drivers/md/bitmap.c

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long inde
225225
|| test_bit(Faulty, &rdev->flags))
226226
continue;
227227

228-
target = (rdev->sb_offset << 1) + offset + index * (PAGE_SIZE/512);
228+
target = rdev->sb_start + offset + index * (PAGE_SIZE/512);
229229

230230
if (sync_page_io(rdev->bdev, target, PAGE_SIZE, page, READ)) {
231231
page->index = index;
@@ -241,10 +241,10 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long inde
241241
static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
242242
{
243243
mdk_rdev_t *rdev;
244-
struct list_head *tmp;
245244
mddev_t *mddev = bitmap->mddev;
246245

247-
rdev_for_each(rdev, tmp, mddev)
246+
rcu_read_lock();
247+
rdev_for_each_rcu(rdev, mddev)
248248
if (test_bit(In_sync, &rdev->flags)
249249
&& !test_bit(Faulty, &rdev->flags)) {
250250
int size = PAGE_SIZE;
@@ -260,32 +260,37 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
260260
+ (long)(page->index * (PAGE_SIZE/512))
261261
+ size/512 > 0)
262262
/* bitmap runs in to metadata */
263-
return -EINVAL;
263+
goto bad_alignment;
264264
if (rdev->data_offset + mddev->size*2
265-
> rdev->sb_offset*2 + bitmap->offset)
265+
> rdev->sb_start + bitmap->offset)
266266
/* data runs in to bitmap */
267-
return -EINVAL;
268-
} else if (rdev->sb_offset*2 < rdev->data_offset) {
267+
goto bad_alignment;
268+
} else if (rdev->sb_start < rdev->data_offset) {
269269
/* METADATA BITMAP DATA */
270-
if (rdev->sb_offset*2
270+
if (rdev->sb_start
271271
+ bitmap->offset
272272
+ page->index*(PAGE_SIZE/512) + size/512
273273
> rdev->data_offset)
274274
/* bitmap runs in to data */
275-
return -EINVAL;
275+
goto bad_alignment;
276276
} else {
277277
/* DATA METADATA BITMAP - no problems */
278278
}
279279
md_super_write(mddev, rdev,
280-
(rdev->sb_offset<<1) + bitmap->offset
280+
rdev->sb_start + bitmap->offset
281281
+ page->index * (PAGE_SIZE/512),
282282
size,
283283
page);
284284
}
285+
rcu_read_unlock();
285286

286287
if (wait)
287288
md_super_wait(mddev);
288289
return 0;
290+
291+
bad_alignment:
292+
rcu_read_unlock();
293+
return -EINVAL;
289294
}
290295

291296
static void bitmap_file_kick(struct bitmap *bitmap);
@@ -454,8 +459,11 @@ void bitmap_update_sb(struct bitmap *bitmap)
454459
spin_unlock_irqrestore(&bitmap->lock, flags);
455460
sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
456461
sb->events = cpu_to_le64(bitmap->mddev->events);
457-
if (!bitmap->mddev->degraded)
458-
sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
462+
if (bitmap->mddev->events < bitmap->events_cleared) {
463+
/* rocking back to read-only */
464+
bitmap->events_cleared = bitmap->mddev->events;
465+
sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
466+
}
459467
kunmap_atomic(sb, KM_USER0);
460468
write_page(bitmap, bitmap->sb_page, 1);
461469
}
@@ -1085,9 +1093,19 @@ void bitmap_daemon_work(struct bitmap *bitmap)
10851093
} else
10861094
spin_unlock_irqrestore(&bitmap->lock, flags);
10871095
lastpage = page;
1088-
/*
1089-
printk("bitmap clean at page %lu\n", j);
1090-
*/
1096+
1097+
/* We are possibly going to clear some bits, so make
1098+
* sure that events_cleared is up-to-date.
1099+
*/
1100+
if (bitmap->need_sync) {
1101+
bitmap_super_t *sb;
1102+
bitmap->need_sync = 0;
1103+
sb = kmap_atomic(bitmap->sb_page, KM_USER0);
1104+
sb->events_cleared =
1105+
cpu_to_le64(bitmap->events_cleared);
1106+
kunmap_atomic(sb, KM_USER0);
1107+
write_page(bitmap, bitmap->sb_page, 1);
1108+
}
10911109
spin_lock_irqsave(&bitmap->lock, flags);
10921110
clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
10931111
}
@@ -1257,6 +1275,12 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
12571275
return;
12581276
}
12591277

1278+
if (success &&
1279+
bitmap->events_cleared < bitmap->mddev->events) {
1280+
bitmap->events_cleared = bitmap->mddev->events;
1281+
bitmap->need_sync = 1;
1282+
}
1283+
12601284
if (!success && ! (*bmc & NEEDED_MASK))
12611285
*bmc |= NEEDED_MASK;
12621286

drivers/md/faulty.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ static int run(mddev_t *mddev)
297297
rdev_for_each(rdev, tmp, mddev)
298298
conf->rdev = rdev;
299299

300-
mddev->array_size = mddev->size;
300+
mddev->array_sectors = mddev->size * 2;
301301
mddev->private = conf;
302302

303303
reconfig(mddev, mddev->layout, -1);

drivers/md/linear.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -122,13 +122,13 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
122122
return NULL;
123123

124124
cnt = 0;
125-
conf->array_size = 0;
125+
conf->array_sectors = 0;
126126

127127
rdev_for_each(rdev, tmp, mddev) {
128128
int j = rdev->raid_disk;
129129
dev_info_t *disk = conf->disks + j;
130130

131-
if (j < 0 || j > raid_disks || disk->rdev) {
131+
if (j < 0 || j >= raid_disks || disk->rdev) {
132132
printk("linear: disk numbering problem. Aborting!\n");
133133
goto out;
134134
}
@@ -146,7 +146,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
146146
blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
147147

148148
disk->size = rdev->size;
149-
conf->array_size += rdev->size;
149+
conf->array_sectors += rdev->size * 2;
150150

151151
cnt++;
152152
}
@@ -155,7 +155,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
155155
goto out;
156156
}
157157

158-
min_spacing = conf->array_size;
158+
min_spacing = conf->array_sectors / 2;
159159
sector_div(min_spacing, PAGE_SIZE/sizeof(struct dev_info *));
160160

161161
/* min_spacing is the minimum spacing that will fit the hash
@@ -164,7 +164,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
164164
* that is larger than min_spacing as use the size of that as
165165
* the actual spacing
166166
*/
167-
conf->hash_spacing = conf->array_size;
167+
conf->hash_spacing = conf->array_sectors / 2;
168168
for (i=0; i < cnt-1 ; i++) {
169169
sector_t sz = 0;
170170
int j;
@@ -194,7 +194,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
194194
unsigned round;
195195
unsigned long base;
196196

197-
sz = conf->array_size >> conf->preshift;
197+
sz = conf->array_sectors >> (conf->preshift + 1);
198198
sz += 1; /* force round-up */
199199
base = conf->hash_spacing >> conf->preshift;
200200
round = sector_div(sz, base);
@@ -221,7 +221,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
221221
curr_offset = 0;
222222
i = 0;
223223
for (curr_offset = 0;
224-
curr_offset < conf->array_size;
224+
curr_offset < conf->array_sectors / 2;
225225
curr_offset += conf->hash_spacing) {
226226

227227
while (i < raid_disks-1 &&
@@ -258,7 +258,7 @@ static int linear_run (mddev_t *mddev)
258258
if (!conf)
259259
return 1;
260260
mddev->private = conf;
261-
mddev->array_size = conf->array_size;
261+
mddev->array_sectors = conf->array_sectors;
262262

263263
blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
264264
mddev->queue->unplug_fn = linear_unplug;
@@ -292,8 +292,8 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev)
292292
newconf->prev = mddev_to_conf(mddev);
293293
mddev->private = newconf;
294294
mddev->raid_disks++;
295-
mddev->array_size = newconf->array_size;
296-
set_capacity(mddev->gendisk, mddev->array_size << 1);
295+
mddev->array_sectors = newconf->array_sectors;
296+
set_capacity(mddev->gendisk, mddev->array_sectors);
297297
return 0;
298298
}
299299

0 commit comments

Comments
 (0)