Skip to content

Commit 23b5ec7

Browse files
Josef Bacikkdave
authored andcommitted
btrfs: fix readdir deadlock with pagefault
Readdir does dir_emit while under the btree lock. dir_emit can trigger the page fault which means we can deadlock. Fix this by allocating a buffer on opening a directory and copying the readdir into this buffer and doing dir_emit from outside of the tree lock. Thread A readdir <holding tree lock> dir_emit <page fault> down_read(mmap_sem) Thread B mmap write down_write(mmap_sem) page_mkwrite wait_ordered_extents Process C finish_ordered_extent insert_reserved_file_extent try to lock leaf <hang> Signed-off-by: Josef Bacik <jbacik@fb.com> Reviewed-by: David Sterba <dsterba@suse.com> [ copy the deadlock scenario to changelog ] Signed-off-by: David Sterba <dsterba@suse.com>
1 parent 8d8aafe commit 23b5ec7

File tree

4 files changed

+110
-34
lines changed

4 files changed

+110
-34
lines changed

fs/btrfs/ctree.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1264,6 +1264,11 @@ struct btrfs_root {
12641264
atomic64_t qgroup_meta_rsv;
12651265
};
12661266

1267+
struct btrfs_file_private {
1268+
struct btrfs_trans_handle *trans;
1269+
void *filldir_buf;
1270+
};
1271+
12671272
static inline u32 btrfs_inode_sectorsize(const struct inode *inode)
12681273
{
12691274
return btrfs_sb(inode->i_sb)->sectorsize;

fs/btrfs/file.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1990,8 +1990,15 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
19901990

19911991
int btrfs_release_file(struct inode *inode, struct file *filp)
19921992
{
1993-
if (filp->private_data)
1993+
struct btrfs_file_private *private = filp->private_data;
1994+
1995+
if (private && private->trans)
19941996
btrfs_ioctl_trans_end(filp);
1997+
if (private && private->filldir_buf)
1998+
kfree(private->filldir_buf);
1999+
kfree(private);
2000+
filp->private_data = NULL;
2001+
19952002
/*
19962003
* ordered_data_close is set by settattr when we are about to truncate
19972004
* a file from a non-zero size to a zero size. This tries to

fs/btrfs/inode.c

Lines changed: 82 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -5876,25 +5876,74 @@ unsigned char btrfs_filetype_table[] = {
58765876
DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
58775877
};
58785878

5879+
/*
5880+
* All this infrastructure exists because dir_emit can fault, and we are holding
5881+
* the tree lock when doing readdir. For now just allocate a buffer and copy
5882+
* our information into that, and then dir_emit from the buffer. This is
5883+
* similar to what NFS does, only we don't keep the buffer around in pagecache
5884+
* because I'm afraid I'll mess that up. Long term we need to make filldir do
5885+
* copy_to_user_inatomic so we don't have to worry about page faulting under the
5886+
* tree lock.
5887+
*/
5888+
static int btrfs_opendir(struct inode *inode, struct file *file)
5889+
{
5890+
struct btrfs_file_private *private;
5891+
5892+
private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL);
5893+
if (!private)
5894+
return -ENOMEM;
5895+
private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
5896+
if (!private->filldir_buf) {
5897+
kfree(private);
5898+
return -ENOMEM;
5899+
}
5900+
file->private_data = private;
5901+
return 0;
5902+
}
5903+
5904+
struct dir_entry {
5905+
u64 ino;
5906+
u64 offset;
5907+
unsigned type;
5908+
int name_len;
5909+
};
5910+
5911+
static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx)
5912+
{
5913+
while (entries--) {
5914+
struct dir_entry *entry = addr;
5915+
char *name = (char *)(entry + 1);
5916+
5917+
ctx->pos = entry->offset;
5918+
if (!dir_emit(ctx, name, entry->name_len, entry->ino,
5919+
entry->type))
5920+
return 1;
5921+
addr += sizeof(struct dir_entry) + entry->name_len;
5922+
ctx->pos++;
5923+
}
5924+
return 0;
5925+
}
5926+
58795927
static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
58805928
{
58815929
struct inode *inode = file_inode(file);
58825930
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
58835931
struct btrfs_root *root = BTRFS_I(inode)->root;
5932+
struct btrfs_file_private *private = file->private_data;
58845933
struct btrfs_dir_item *di;
58855934
struct btrfs_key key;
58865935
struct btrfs_key found_key;
58875936
struct btrfs_path *path;
5937+
void *addr;
58885938
struct list_head ins_list;
58895939
struct list_head del_list;
58905940
int ret;
58915941
struct extent_buffer *leaf;
58925942
int slot;
5893-
unsigned char d_type;
5894-
int over = 0;
5895-
char tmp_name[32];
58965943
char *name_ptr;
58975944
int name_len;
5945+
int entries = 0;
5946+
int total_len = 0;
58985947
bool put = false;
58995948
struct btrfs_key location;
59005949

@@ -5905,12 +5954,14 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
59055954
if (!path)
59065955
return -ENOMEM;
59075956

5957+
addr = private->filldir_buf;
59085958
path->reada = READA_FORWARD;
59095959

59105960
INIT_LIST_HEAD(&ins_list);
59115961
INIT_LIST_HEAD(&del_list);
59125962
put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list);
59135963

5964+
again:
59145965
key.type = BTRFS_DIR_INDEX_KEY;
59155966
key.offset = ctx->pos;
59165967
key.objectid = btrfs_ino(BTRFS_I(inode));
@@ -5920,6 +5971,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
59205971
goto err;
59215972

59225973
while (1) {
5974+
struct dir_entry *entry;
5975+
59235976
leaf = path->nodes[0];
59245977
slot = path->slots[0];
59255978
if (slot >= btrfs_header_nritems(leaf)) {
@@ -5941,41 +5994,43 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
59415994
goto next;
59425995
if (btrfs_should_delete_dir_index(&del_list, found_key.offset))
59435996
goto next;
5944-
5945-
ctx->pos = found_key.offset;
5946-
59475997
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
59485998
if (verify_dir_item(fs_info, leaf, slot, di))
59495999
goto next;
59506000

59516001
name_len = btrfs_dir_name_len(leaf, di);
5952-
if (name_len <= sizeof(tmp_name)) {
5953-
name_ptr = tmp_name;
5954-
} else {
5955-
name_ptr = kmalloc(name_len, GFP_KERNEL);
5956-
if (!name_ptr) {
5957-
ret = -ENOMEM;
5958-
goto err;
5959-
}
6002+
if ((total_len + sizeof(struct dir_entry) + name_len) >=
6003+
PAGE_SIZE) {
6004+
btrfs_release_path(path);
6005+
ret = btrfs_filldir(private->filldir_buf, entries, ctx);
6006+
if (ret)
6007+
goto nopos;
6008+
addr = private->filldir_buf;
6009+
entries = 0;
6010+
total_len = 0;
6011+
goto again;
59606012
}
6013+
6014+
entry = addr;
6015+
entry->name_len = name_len;
6016+
name_ptr = (char *)(entry + 1);
59616017
read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1),
59626018
name_len);
5963-
5964-
d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
6019+
entry->type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
59656020
btrfs_dir_item_key_to_cpu(leaf, di, &location);
5966-
5967-
over = !dir_emit(ctx, name_ptr, name_len, location.objectid,
5968-
d_type);
5969-
5970-
if (name_ptr != tmp_name)
5971-
kfree(name_ptr);
5972-
5973-
if (over)
5974-
goto nopos;
5975-
ctx->pos++;
6021+
entry->ino = location.objectid;
6022+
entry->offset = found_key.offset;
6023+
entries++;
6024+
addr += sizeof(struct dir_entry) + name_len;
6025+
total_len += sizeof(struct dir_entry) + name_len;
59766026
next:
59776027
path->slots[0]++;
59786028
}
6029+
btrfs_release_path(path);
6030+
6031+
ret = btrfs_filldir(private->filldir_buf, entries, ctx);
6032+
if (ret)
6033+
goto nopos;
59796034

59806035
ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
59816036
if (ret)
@@ -10779,6 +10834,7 @@ static const struct file_operations btrfs_dir_file_operations = {
1077910834
.llseek = generic_file_llseek,
1078010835
.read = generic_read_dir,
1078110836
.iterate_shared = btrfs_real_readdir,
10837+
.open = btrfs_opendir,
1078210838
.unlocked_ioctl = btrfs_ioctl,
1078310839
#ifdef CONFIG_COMPAT
1078410840
.compat_ioctl = btrfs_compat_ioctl,

fs/btrfs/ioctl.c

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3966,6 +3966,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
39663966
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
39673967
struct btrfs_root *root = BTRFS_I(inode)->root;
39683968
struct btrfs_trans_handle *trans;
3969+
struct btrfs_file_private *private;
39693970
int ret;
39703971
static bool warned = false;
39713972

@@ -3984,8 +3985,16 @@ static long btrfs_ioctl_trans_start(struct file *file)
39843985
}
39853986

39863987
ret = -EINPROGRESS;
3987-
if (file->private_data)
3988+
private = file->private_data;
3989+
if (private && private->trans)
39883990
goto out;
3991+
if (!private) {
3992+
private = kzalloc(sizeof(struct btrfs_file_private),
3993+
GFP_KERNEL);
3994+
if (!private)
3995+
return -ENOMEM;
3996+
file->private_data = private;
3997+
}
39893998

39903999
ret = -EROFS;
39914000
if (btrfs_root_readonly(root))
@@ -4002,7 +4011,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
40024011
if (IS_ERR(trans))
40034012
goto out_drop;
40044013

4005-
file->private_data = trans;
4014+
private->trans = trans;
40064015
return 0;
40074016

40084017
out_drop:
@@ -4257,14 +4266,13 @@ long btrfs_ioctl_trans_end(struct file *file)
42574266
{
42584267
struct inode *inode = file_inode(file);
42594268
struct btrfs_root *root = BTRFS_I(inode)->root;
4260-
struct btrfs_trans_handle *trans;
4269+
struct btrfs_file_private *private = file->private_data;
42614270

4262-
trans = file->private_data;
4263-
if (!trans)
4271+
if (!private || !private->trans)
42644272
return -EINVAL;
4265-
file->private_data = NULL;
42664273

4267-
btrfs_end_transaction(trans);
4274+
btrfs_end_transaction(private->trans);
4275+
private->trans = NULL;
42684276

42694277
atomic_dec(&root->fs_info->open_ioctl_trans);
42704278

0 commit comments

Comments
 (0)