Skip to content

Commit 6f8d445

Browse files
author
Jaegeuk Kim
committed
f2fs: avoid fi->i_gc_rwsem[WRITE] lock in f2fs_gc
The f2fs_gc() called by f2fs_balance_fs() requires to be called outside of fi->i_gc_rwsem[WRITE], since f2fs_gc() can try to grab it in a loop. If it hits the miximum retrials in GC, let's give a chance to release gc_mutex for a short time in order not to go into live lock in the worst case. Reviewed-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
1 parent 853137c commit 6f8d445

File tree

6 files changed

+91
-67
lines changed

6 files changed

+91
-67
lines changed

fs/f2fs/data.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2217,14 +2217,14 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
22172217
loff_t i_size = i_size_read(inode);
22182218

22192219
if (to > i_size) {
2220-
down_write(&F2FS_I(inode)->i_mmap_sem);
22212220
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
2221+
down_write(&F2FS_I(inode)->i_mmap_sem);
22222222

22232223
truncate_pagecache(inode, i_size);
22242224
f2fs_truncate_blocks(inode, i_size, true);
22252225

2226-
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
22272226
up_write(&F2FS_I(inode)->i_mmap_sem);
2227+
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
22282228
}
22292229
}
22302230

fs/f2fs/f2fs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1243,6 +1243,7 @@ struct f2fs_sb_info {
12431243
unsigned int gc_mode; /* current GC state */
12441244
/* for skip statistic */
12451245
unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */
1246+
unsigned long long skipped_gc_rwsem; /* FG_GC only */
12461247

12471248
/* threshold for gc trials on pinned files */
12481249
u64 gc_pin_file_threshold;

fs/f2fs/file.c

Lines changed: 62 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -797,8 +797,8 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
797797
if (attr->ia_valid & ATTR_SIZE) {
798798
bool to_smaller = (attr->ia_size <= i_size_read(inode));
799799

800-
down_write(&F2FS_I(inode)->i_mmap_sem);
801800
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
801+
down_write(&F2FS_I(inode)->i_mmap_sem);
802802

803803
truncate_setsize(inode, attr->ia_size);
804804

@@ -808,8 +808,8 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
808808
* do not trim all blocks after i_size if target size is
809809
* larger than i_size.
810810
*/
811-
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
812811
up_write(&F2FS_I(inode)->i_mmap_sem);
812+
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
813813

814814
if (err)
815815
return err;
@@ -962,8 +962,8 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
962962
blk_start = (loff_t)pg_start << PAGE_SHIFT;
963963
blk_end = (loff_t)pg_end << PAGE_SHIFT;
964964

965-
down_write(&F2FS_I(inode)->i_mmap_sem);
966965
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
966+
down_write(&F2FS_I(inode)->i_mmap_sem);
967967

968968
truncate_inode_pages_range(mapping, blk_start,
969969
blk_end - 1);
@@ -972,8 +972,8 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
972972
ret = f2fs_truncate_hole(inode, pg_start, pg_end);
973973
f2fs_unlock_op(sbi);
974974

975-
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
976975
up_write(&F2FS_I(inode)->i_mmap_sem);
976+
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
977977
}
978978
}
979979

@@ -1188,25 +1188,33 @@ static int __exchange_data_block(struct inode *src_inode,
11881188
return ret;
11891189
}
11901190

1191-
static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end)
1191+
static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
11921192
{
11931193
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
11941194
pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
1195+
pgoff_t start = offset >> PAGE_SHIFT;
1196+
pgoff_t end = (offset + len) >> PAGE_SHIFT;
11951197
int ret;
11961198

11971199
f2fs_balance_fs(sbi, true);
1198-
f2fs_lock_op(sbi);
11991200

1200-
f2fs_drop_extent_tree(inode);
1201+
/* avoid gc operation during block exchange */
1202+
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1203+
down_write(&F2FS_I(inode)->i_mmap_sem);
12011204

1205+
f2fs_lock_op(sbi);
1206+
f2fs_drop_extent_tree(inode);
1207+
truncate_pagecache(inode, offset);
12021208
ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true);
12031209
f2fs_unlock_op(sbi);
1210+
1211+
up_write(&F2FS_I(inode)->i_mmap_sem);
1212+
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
12041213
return ret;
12051214
}
12061215

12071216
static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
12081217
{
1209-
pgoff_t pg_start, pg_end;
12101218
loff_t new_size;
12111219
int ret;
12121220

@@ -1221,37 +1229,27 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
12211229
if (ret)
12221230
return ret;
12231231

1224-
pg_start = offset >> PAGE_SHIFT;
1225-
pg_end = (offset + len) >> PAGE_SHIFT;
1226-
1227-
/* avoid gc operation during block exchange */
1228-
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1229-
1230-
down_write(&F2FS_I(inode)->i_mmap_sem);
12311232
/* write out all dirty pages from offset */
12321233
ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
12331234
if (ret)
1234-
goto out_unlock;
1235-
1236-
truncate_pagecache(inode, offset);
1235+
return ret;
12371236

1238-
ret = f2fs_do_collapse(inode, pg_start, pg_end);
1237+
ret = f2fs_do_collapse(inode, offset, len);
12391238
if (ret)
1240-
goto out_unlock;
1239+
return ret;
12411240

12421241
/* write out all moved pages, if possible */
1242+
down_write(&F2FS_I(inode)->i_mmap_sem);
12431243
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
12441244
truncate_pagecache(inode, offset);
12451245

12461246
new_size = i_size_read(inode) - len;
12471247
truncate_pagecache(inode, new_size);
12481248

12491249
ret = f2fs_truncate_blocks(inode, new_size, true);
1250+
up_write(&F2FS_I(inode)->i_mmap_sem);
12501251
if (!ret)
12511252
f2fs_i_size_write(inode, new_size);
1252-
out_unlock:
1253-
up_write(&F2FS_I(inode)->i_mmap_sem);
1254-
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
12551253
return ret;
12561254
}
12571255

@@ -1317,10 +1315,9 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
13171315
if (ret)
13181316
return ret;
13191317

1320-
down_write(&F2FS_I(inode)->i_mmap_sem);
13211318
ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
13221319
if (ret)
1323-
goto out_sem;
1320+
return ret;
13241321

13251322
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
13261323
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
@@ -1332,15 +1329,15 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
13321329
ret = fill_zero(inode, pg_start, off_start,
13331330
off_end - off_start);
13341331
if (ret)
1335-
goto out_sem;
1332+
return ret;
13361333

13371334
new_size = max_t(loff_t, new_size, offset + len);
13381335
} else {
13391336
if (off_start) {
13401337
ret = fill_zero(inode, pg_start++, off_start,
13411338
PAGE_SIZE - off_start);
13421339
if (ret)
1343-
goto out_sem;
1340+
return ret;
13441341

13451342
new_size = max_t(loff_t, new_size,
13461343
(loff_t)pg_start << PAGE_SHIFT);
@@ -1352,6 +1349,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
13521349
pgoff_t end;
13531350

13541351
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1352+
down_write(&F2FS_I(inode)->i_mmap_sem);
13551353

13561354
truncate_pagecache_range(inode,
13571355
(loff_t)index << PAGE_SHIFT,
@@ -1363,6 +1361,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
13631361
ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
13641362
if (ret) {
13651363
f2fs_unlock_op(sbi);
1364+
up_write(&F2FS_I(inode)->i_mmap_sem);
13661365
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
13671366
goto out;
13681367
}
@@ -1374,6 +1373,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
13741373
f2fs_put_dnode(&dn);
13751374

13761375
f2fs_unlock_op(sbi);
1376+
up_write(&F2FS_I(inode)->i_mmap_sem);
13771377
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
13781378

13791379
f2fs_balance_fs(sbi, dn.node_changed);
@@ -1402,9 +1402,6 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
14021402
else
14031403
f2fs_i_size_write(inode, new_size);
14041404
}
1405-
out_sem:
1406-
up_write(&F2FS_I(inode)->i_mmap_sem);
1407-
14081405
return ret;
14091406
}
14101407

@@ -1433,26 +1430,27 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
14331430

14341431
f2fs_balance_fs(sbi, true);
14351432

1436-
/* avoid gc operation during block exchange */
1437-
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1438-
14391433
down_write(&F2FS_I(inode)->i_mmap_sem);
14401434
ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
1435+
up_write(&F2FS_I(inode)->i_mmap_sem);
14411436
if (ret)
1442-
goto out;
1437+
return ret;
14431438

14441439
/* write out all dirty pages from offset */
14451440
ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
14461441
if (ret)
1447-
goto out;
1448-
1449-
truncate_pagecache(inode, offset);
1442+
return ret;
14501443

14511444
pg_start = offset >> PAGE_SHIFT;
14521445
pg_end = (offset + len) >> PAGE_SHIFT;
14531446
delta = pg_end - pg_start;
14541447
idx = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
14551448

1449+
/* avoid gc operation during block exchange */
1450+
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1451+
down_write(&F2FS_I(inode)->i_mmap_sem);
1452+
truncate_pagecache(inode, offset);
1453+
14561454
while (!ret && idx > pg_start) {
14571455
nr = idx - pg_start;
14581456
if (nr > delta)
@@ -1466,16 +1464,17 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
14661464
idx + delta, nr, false);
14671465
f2fs_unlock_op(sbi);
14681466
}
1467+
up_write(&F2FS_I(inode)->i_mmap_sem);
1468+
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
14691469

14701470
/* write out all moved pages, if possible */
1471+
down_write(&F2FS_I(inode)->i_mmap_sem);
14711472
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
14721473
truncate_pagecache(inode, offset);
1474+
up_write(&F2FS_I(inode)->i_mmap_sem);
14731475

14741476
if (!ret)
14751477
f2fs_i_size_write(inode, new_size);
1476-
out:
1477-
up_write(&F2FS_I(inode)->i_mmap_sem);
1478-
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
14791478
return ret;
14801479
}
14811480

@@ -1722,8 +1721,6 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
17221721

17231722
inode_lock(inode);
17241723

1725-
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1726-
17271724
if (f2fs_is_atomic_file(inode)) {
17281725
if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST))
17291726
ret = -EINVAL;
@@ -1734,25 +1731,29 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
17341731
if (ret)
17351732
goto out;
17361733

1734+
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1735+
17371736
if (!get_dirty_pages(inode))
17381737
goto skip_flush;
17391738

17401739
f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
17411740
"Unexpected flush for atomic writes: ino=%lu, npages=%u",
17421741
inode->i_ino, get_dirty_pages(inode));
17431742
ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
1744-
if (ret)
1743+
if (ret) {
1744+
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
17451745
goto out;
1746+
}
17461747
skip_flush:
17471748
set_inode_flag(inode, FI_ATOMIC_FILE);
17481749
clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
1749-
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
1750+
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
17501751

1752+
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
17511753
F2FS_I(inode)->inmem_task = current;
17521754
stat_inc_atomic_write(inode);
17531755
stat_update_max_atomic_write(inode);
17541756
out:
1755-
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
17561757
inode_unlock(inode);
17571758
mnt_drop_write_file(filp);
17581759
return ret;
@@ -1770,9 +1771,9 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
17701771
if (ret)
17711772
return ret;
17721773

1773-
inode_lock(inode);
1774+
f2fs_balance_fs(F2FS_I_SB(inode), true);
17741775

1775-
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1776+
inode_lock(inode);
17761777

17771778
if (f2fs_is_volatile_file(inode)) {
17781779
ret = -EINVAL;
@@ -1798,7 +1799,6 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
17981799
clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
17991800
ret = -EINVAL;
18001801
}
1801-
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
18021802
inode_unlock(inode);
18031803
mnt_drop_write_file(filp);
18041804
return ret;
@@ -2394,15 +2394,10 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
23942394
}
23952395

23962396
inode_lock(src);
2397-
down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
23982397
if (src != dst) {
23992398
ret = -EBUSY;
24002399
if (!inode_trylock(dst))
24012400
goto out;
2402-
if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE])) {
2403-
inode_unlock(dst);
2404-
goto out;
2405-
}
24062401
}
24072402

24082403
ret = -EINVAL;
@@ -2447,6 +2442,14 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
24472442
goto out_unlock;
24482443

24492444
f2fs_balance_fs(sbi, true);
2445+
2446+
down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
2447+
if (src != dst) {
2448+
ret = -EBUSY;
2449+
if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE]))
2450+
goto out_src;
2451+
}
2452+
24502453
f2fs_lock_op(sbi);
24512454
ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS,
24522455
pos_out >> F2FS_BLKSIZE_BITS,
@@ -2459,13 +2462,15 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
24592462
f2fs_i_size_write(dst, dst_osize);
24602463
}
24612464
f2fs_unlock_op(sbi);
2462-
out_unlock:
2463-
if (src != dst) {
2465+
2466+
if (src != dst)
24642467
up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]);
2468+
out_src:
2469+
up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
2470+
out_unlock:
2471+
if (src != dst)
24652472
inode_unlock(dst);
2466-
}
24672473
out:
2468-
up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
24692474
inode_unlock(src);
24702475
return ret;
24712476
}

0 commit comments

Comments
 (0)