Skip to content

Commit e942f88

Browse files
author
Chris Mason
committed
Merge branch 'raid56-experimental' into for-linus-3.9
Signed-off-by: Chris Mason <chris.mason@fusionio.com> Conflicts: fs/btrfs/ctree.h fs/btrfs/extent-tree.c fs/btrfs/inode.c fs/btrfs/volumes.c
2 parents b2c6b3e + 0e4e026 commit e942f88

18 files changed

+2814
-120
lines changed

fs/btrfs/Kconfig

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ config BTRFS_FS
66
select ZLIB_DEFLATE
77
select LZO_COMPRESS
88
select LZO_DECOMPRESS
9+
select RAID6_PQ
10+
select XOR_BLOCKS
11+
912
help
1013
Btrfs is a new filesystem with extents, writable snapshotting,
1114
support for multiple devices and many more features.

fs/btrfs/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
88
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
99
export.o tree-log.o free-space-cache.o zlib.o lzo.o \
1010
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
11-
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o
11+
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o
1212

1313
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
1414
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o

fs/btrfs/compression.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
372372
page = compressed_pages[pg_index];
373373
page->mapping = inode->i_mapping;
374374
if (bio->bi_size)
375-
ret = io_tree->ops->merge_bio_hook(page, 0,
375+
ret = io_tree->ops->merge_bio_hook(WRITE, page, 0,
376376
PAGE_CACHE_SIZE,
377377
bio, 0);
378378
else
@@ -655,7 +655,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
655655
page->index = em_start >> PAGE_CACHE_SHIFT;
656656

657657
if (comp_bio->bi_size)
658-
ret = tree->ops->merge_bio_hook(page, 0,
658+
ret = tree->ops->merge_bio_hook(READ, page, 0,
659659
PAGE_CACHE_SIZE,
660660
comp_bio, 0);
661661
else

fs/btrfs/ctree.h

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,7 @@ struct btrfs_super_block {
506506
#define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5)
507507

508508
#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6)
509+
#define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7)
509510

510511
#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
511512
#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
@@ -515,6 +516,7 @@ struct btrfs_super_block {
515516
BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
516517
BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \
517518
BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \
519+
BTRFS_FEATURE_INCOMPAT_RAID56 | \
518520
BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
519521

520522
/*
@@ -956,6 +958,8 @@ struct btrfs_dev_replace_item {
956958
#define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4)
957959
#define BTRFS_BLOCK_GROUP_DUP (1ULL << 5)
958960
#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6)
961+
#define BTRFS_BLOCK_GROUP_RAID5 (1 << 7)
962+
#define BTRFS_BLOCK_GROUP_RAID6 (1 << 8)
959963
#define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE
960964

961965
enum btrfs_raid_types {
@@ -964,6 +968,8 @@ enum btrfs_raid_types {
964968
BTRFS_RAID_DUP,
965969
BTRFS_RAID_RAID0,
966970
BTRFS_RAID_SINGLE,
971+
BTRFS_RAID_RAID5,
972+
BTRFS_RAID_RAID6,
967973
BTRFS_NR_RAID_TYPES
968974
};
969975

@@ -973,6 +979,8 @@ enum btrfs_raid_types {
973979

974980
#define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \
975981
BTRFS_BLOCK_GROUP_RAID1 | \
982+
BTRFS_BLOCK_GROUP_RAID5 | \
983+
BTRFS_BLOCK_GROUP_RAID6 | \
976984
BTRFS_BLOCK_GROUP_DUP | \
977985
BTRFS_BLOCK_GROUP_RAID10)
978986
/*
@@ -1197,6 +1205,10 @@ struct btrfs_block_group_cache {
11971205
u64 flags;
11981206
u64 sectorsize;
11991207
u64 cache_generation;
1208+
1209+
/* for raid56, this is a full stripe, without parity */
1210+
unsigned long full_stripe_len;
1211+
12001212
unsigned int ro:1;
12011213
unsigned int dirty:1;
12021214
unsigned int iref:1;
@@ -1242,6 +1254,23 @@ enum btrfs_orphan_cleanup_state {
12421254
ORPHAN_CLEANUP_DONE = 2,
12431255
};
12441256

1257+
/* used by the raid56 code to lock stripes for read/modify/write */
1258+
struct btrfs_stripe_hash {
1259+
struct list_head hash_list;
1260+
wait_queue_head_t wait;
1261+
spinlock_t lock;
1262+
};
1263+
1264+
/* used by the raid56 code to lock stripes for read/modify/write */
1265+
struct btrfs_stripe_hash_table {
1266+
struct list_head stripe_cache;
1267+
spinlock_t cache_lock;
1268+
int cache_size;
1269+
struct btrfs_stripe_hash table[];
1270+
};
1271+
1272+
#define BTRFS_STRIPE_HASH_TABLE_BITS 11
1273+
12451274
/* fs_info */
12461275
struct reloc_control;
12471276
struct btrfs_device;
@@ -1341,6 +1370,13 @@ struct btrfs_fs_info {
13411370
struct mutex cleaner_mutex;
13421371
struct mutex chunk_mutex;
13431372
struct mutex volume_mutex;
1373+
1374+
/* this is used during read/modify/write to make sure
1375+
* no two ios are trying to mod the same stripe at the same
1376+
* time
1377+
*/
1378+
struct btrfs_stripe_hash_table *stripe_hash_table;
1379+
13441380
/*
13451381
* this protects the ordered operations list only while we are
13461382
* processing all of the entries on it. This way we make
@@ -1423,6 +1459,8 @@ struct btrfs_fs_info {
14231459
struct btrfs_workers flush_workers;
14241460
struct btrfs_workers endio_workers;
14251461
struct btrfs_workers endio_meta_workers;
1462+
struct btrfs_workers endio_raid56_workers;
1463+
struct btrfs_workers rmw_workers;
14261464
struct btrfs_workers endio_meta_write_workers;
14271465
struct btrfs_workers endio_write_workers;
14281466
struct btrfs_workers endio_freespace_worker;
@@ -3490,9 +3528,9 @@ int btrfs_writepages(struct address_space *mapping,
34903528
struct writeback_control *wbc);
34913529
int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
34923530
struct btrfs_root *new_root, u64 new_dirid);
3493-
int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
3494-
size_t size, struct bio *bio, unsigned long bio_flags);
3495-
3531+
int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
3532+
size_t size, struct bio *bio,
3533+
unsigned long bio_flags);
34963534
int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
34973535
int btrfs_readpage(struct file *file, struct page *page);
34983536
void btrfs_evict_inode(struct inode *inode);

fs/btrfs/delayed-ref.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,15 @@ struct btrfs_delayed_ref_root {
131131
/* total number of head nodes ready for processing */
132132
unsigned long num_heads_ready;
133133

134+
/*
135+
* bumped when someone is making progress on the delayed
136+
* refs, so that other procs know they are just adding to
137+
* contention intead of helping
138+
*/
139+
atomic_t procs_running_refs;
140+
atomic_t ref_seq;
141+
wait_queue_head_t wait;
142+
134143
/*
135144
* set when the tree is flushing before a transaction commit,
136145
* used by the throttling code to decide if new updates need

fs/btrfs/disk-io.c

Lines changed: 53 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
#include "check-integrity.h"
4747
#include "rcu-string.h"
4848
#include "dev-replace.h"
49+
#include "raid56.h"
4950

5051
#ifdef CONFIG_X86
5152
#include <asm/cpufeature.h>
@@ -640,8 +641,15 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
640641
btree_readahead_hook(root, eb, eb->start, ret);
641642
}
642643

643-
if (ret)
644+
if (ret) {
645+
/*
646+
* our io error hook is going to dec the io pages
647+
* again, we have to make sure it has something
648+
* to decrement
649+
*/
650+
atomic_inc(&eb->io_pages);
644651
clear_extent_buffer_uptodate(eb);
652+
}
645653
free_extent_buffer(eb);
646654
out:
647655
return ret;
@@ -655,6 +663,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror)
655663
eb = (struct extent_buffer *)page->private;
656664
set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
657665
eb->read_mirror = failed_mirror;
666+
atomic_dec(&eb->io_pages);
658667
if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
659668
btree_readahead_hook(root, eb, eb->start, -EIO);
660669
return -EIO; /* we fixed nothing */
@@ -671,17 +680,23 @@ static void end_workqueue_bio(struct bio *bio, int err)
671680
end_io_wq->work.flags = 0;
672681

673682
if (bio->bi_rw & REQ_WRITE) {
674-
if (end_io_wq->metadata == 1)
683+
if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA)
675684
btrfs_queue_worker(&fs_info->endio_meta_write_workers,
676685
&end_io_wq->work);
677-
else if (end_io_wq->metadata == 2)
686+
else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE)
678687
btrfs_queue_worker(&fs_info->endio_freespace_worker,
679688
&end_io_wq->work);
689+
else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
690+
btrfs_queue_worker(&fs_info->endio_raid56_workers,
691+
&end_io_wq->work);
680692
else
681693
btrfs_queue_worker(&fs_info->endio_write_workers,
682694
&end_io_wq->work);
683695
} else {
684-
if (end_io_wq->metadata)
696+
if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
697+
btrfs_queue_worker(&fs_info->endio_raid56_workers,
698+
&end_io_wq->work);
699+
else if (end_io_wq->metadata)
685700
btrfs_queue_worker(&fs_info->endio_meta_workers,
686701
&end_io_wq->work);
687702
else
@@ -696,6 +711,7 @@ static void end_workqueue_bio(struct bio *bio, int err)
696711
* 0 - if data
697712
* 1 - if normal metadta
698713
* 2 - if writing to the free space cache area
714+
* 3 - raid parity work
699715
*/
700716
int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
701717
int metadata)
@@ -2179,6 +2195,12 @@ int open_ctree(struct super_block *sb,
21792195
init_waitqueue_head(&fs_info->transaction_blocked_wait);
21802196
init_waitqueue_head(&fs_info->async_submit_wait);
21812197

2198+
ret = btrfs_alloc_stripe_hash_table(fs_info);
2199+
if (ret) {
2200+
err = -ENOMEM;
2201+
goto fail_alloc;
2202+
}
2203+
21822204
__setup_root(4096, 4096, 4096, 4096, tree_root,
21832205
fs_info, BTRFS_ROOT_TREE_OBJECTID);
21842206

@@ -2349,6 +2371,12 @@ int open_ctree(struct super_block *sb,
23492371
btrfs_init_workers(&fs_info->endio_meta_write_workers,
23502372
"endio-meta-write", fs_info->thread_pool_size,
23512373
&fs_info->generic_worker);
2374+
btrfs_init_workers(&fs_info->endio_raid56_workers,
2375+
"endio-raid56", fs_info->thread_pool_size,
2376+
&fs_info->generic_worker);
2377+
btrfs_init_workers(&fs_info->rmw_workers,
2378+
"rmw", fs_info->thread_pool_size,
2379+
&fs_info->generic_worker);
23522380
btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
23532381
fs_info->thread_pool_size,
23542382
&fs_info->generic_worker);
@@ -2367,6 +2395,8 @@ int open_ctree(struct super_block *sb,
23672395
*/
23682396
fs_info->endio_workers.idle_thresh = 4;
23692397
fs_info->endio_meta_workers.idle_thresh = 4;
2398+
fs_info->endio_raid56_workers.idle_thresh = 4;
2399+
fs_info->rmw_workers.idle_thresh = 2;
23702400

23712401
fs_info->endio_write_workers.idle_thresh = 2;
23722402
fs_info->endio_meta_write_workers.idle_thresh = 2;
@@ -2383,6 +2413,8 @@ int open_ctree(struct super_block *sb,
23832413
ret |= btrfs_start_workers(&fs_info->fixup_workers);
23842414
ret |= btrfs_start_workers(&fs_info->endio_workers);
23852415
ret |= btrfs_start_workers(&fs_info->endio_meta_workers);
2416+
ret |= btrfs_start_workers(&fs_info->rmw_workers);
2417+
ret |= btrfs_start_workers(&fs_info->endio_raid56_workers);
23862418
ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers);
23872419
ret |= btrfs_start_workers(&fs_info->endio_write_workers);
23882420
ret |= btrfs_start_workers(&fs_info->endio_freespace_worker);
@@ -2726,6 +2758,8 @@ int open_ctree(struct super_block *sb,
27262758
btrfs_stop_workers(&fs_info->workers);
27272759
btrfs_stop_workers(&fs_info->endio_workers);
27282760
btrfs_stop_workers(&fs_info->endio_meta_workers);
2761+
btrfs_stop_workers(&fs_info->endio_raid56_workers);
2762+
btrfs_stop_workers(&fs_info->rmw_workers);
27292763
btrfs_stop_workers(&fs_info->endio_meta_write_workers);
27302764
btrfs_stop_workers(&fs_info->endio_write_workers);
27312765
btrfs_stop_workers(&fs_info->endio_freespace_worker);
@@ -2747,6 +2781,7 @@ int open_ctree(struct super_block *sb,
27472781
fail_srcu:
27482782
cleanup_srcu_struct(&fs_info->subvol_srcu);
27492783
fail:
2784+
btrfs_free_stripe_hash_table(fs_info);
27502785
btrfs_close_devices(fs_info->fs_devices);
27512786
return err;
27522787

@@ -3094,11 +3129,16 @@ int btrfs_calc_num_tolerated_disk_barrier_failures(
30943129
((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK)
30953130
== 0)))
30963131
num_tolerated_disk_barrier_failures = 0;
3097-
else if (num_tolerated_disk_barrier_failures > 1
3098-
&&
3099-
(flags & (BTRFS_BLOCK_GROUP_RAID1 |
3100-
BTRFS_BLOCK_GROUP_RAID10)))
3101-
num_tolerated_disk_barrier_failures = 1;
3132+
else if (num_tolerated_disk_barrier_failures > 1) {
3133+
if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
3134+
BTRFS_BLOCK_GROUP_RAID5 |
3135+
BTRFS_BLOCK_GROUP_RAID10)) {
3136+
num_tolerated_disk_barrier_failures = 1;
3137+
} else if (flags &
3138+
BTRFS_BLOCK_GROUP_RAID5) {
3139+
num_tolerated_disk_barrier_failures = 2;
3140+
}
3141+
}
31023142
}
31033143
}
31043144
up_read(&sinfo->groups_sem);
@@ -3402,6 +3442,8 @@ int close_ctree(struct btrfs_root *root)
34023442
btrfs_stop_workers(&fs_info->workers);
34033443
btrfs_stop_workers(&fs_info->endio_workers);
34043444
btrfs_stop_workers(&fs_info->endio_meta_workers);
3445+
btrfs_stop_workers(&fs_info->endio_raid56_workers);
3446+
btrfs_stop_workers(&fs_info->rmw_workers);
34053447
btrfs_stop_workers(&fs_info->endio_meta_write_workers);
34063448
btrfs_stop_workers(&fs_info->endio_write_workers);
34073449
btrfs_stop_workers(&fs_info->endio_freespace_worker);
@@ -3424,6 +3466,8 @@ int close_ctree(struct btrfs_root *root)
34243466
bdi_destroy(&fs_info->bdi);
34253467
cleanup_srcu_struct(&fs_info->subvol_srcu);
34263468

3469+
btrfs_free_stripe_hash_table(fs_info);
3470+
34273471
return 0;
34283472
}
34293473

fs/btrfs/disk-io.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,13 @@
2525
#define BTRFS_SUPER_MIRROR_MAX 3
2626
#define BTRFS_SUPER_MIRROR_SHIFT 12
2727

28+
enum {
29+
BTRFS_WQ_ENDIO_DATA = 0,
30+
BTRFS_WQ_ENDIO_METADATA = 1,
31+
BTRFS_WQ_ENDIO_FREE_SPACE = 2,
32+
BTRFS_WQ_ENDIO_RAID56 = 3,
33+
};
34+
2835
static inline u64 btrfs_sb_offset(int mirror)
2936
{
3037
u64 start = 16 * 1024;

0 commit comments

Comments
 (0)