Skip to content

Commit 1635f6a

Browse files
Hugh Dickinstorvalds
authored andcommitted
tmpfs: undo fallocation on failure
In the previous episode, we left the already-fallocated pages attached to the file when shmem_fallocate() fails part way through. Now try to do better, by extending the earlier optimization of !Uptodate pages (then always under page lock) to !Uptodate pages (outside of page lock), representing fallocated pages. And don't waste time clearing them at the time of fallocate(), leave that until later if necessary. Adapt shmem_truncate_range() to shmem_undo_range(), so that a failing fallocate can recognize and remove precisely those !Uptodate allocations which it added (and were not independently allocated by racing tasks). But unless we start playing with swapfile.c and memcontrol.c too, once one of our fallocated pages reaches shmem_writepage(), we do then have to instantiate it as an ordinarily allocated page, before swapping out. This is unsatisfactory, but improved in the next episode. Signed-off-by: Hugh Dickins <hughd@google.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Cong Wang <amwang@redhat.com> Cc: Kay Sievers <kay@vrfy.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent e2d12e2 commit 1635f6a

File tree

1 file changed

+72
-33
lines changed

1 file changed

+72
-33
lines changed

mm/shmem.c

Lines changed: 72 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,8 @@ enum sgp_type {
8989
SGP_READ, /* don't exceed i_size, don't allocate page */
9090
SGP_CACHE, /* don't exceed i_size, may allocate page */
9191
SGP_DIRTY, /* like SGP_CACHE, but set new page dirty */
92-
SGP_WRITE, /* may exceed i_size, may allocate page */
92+
SGP_WRITE, /* may exceed i_size, may allocate !Uptodate page */
93+
SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */
9394
};
9495

9596
#ifdef CONFIG_TMPFS
@@ -427,8 +428,10 @@ void shmem_unlock_mapping(struct address_space *mapping)
427428

428429
/*
429430
* Remove range of pages and swap entries from radix tree, and free them.
431+
* If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate.
430432
*/
431-
void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
433+
static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
434+
bool unfalloc)
432435
{
433436
struct address_space *mapping = inode->i_mapping;
434437
struct shmem_inode_info *info = SHMEM_I(inode);
@@ -462,16 +465,20 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
462465
break;
463466

464467
if (radix_tree_exceptional_entry(page)) {
468+
if (unfalloc)
469+
continue;
465470
nr_swaps_freed += !shmem_free_swap(mapping,
466471
index, page);
467472
continue;
468473
}
469474

470475
if (!trylock_page(page))
471476
continue;
472-
if (page->mapping == mapping) {
473-
VM_BUG_ON(PageWriteback(page));
474-
truncate_inode_page(mapping, page);
477+
if (!unfalloc || !PageUptodate(page)) {
478+
if (page->mapping == mapping) {
479+
VM_BUG_ON(PageWriteback(page));
480+
truncate_inode_page(mapping, page);
481+
}
475482
}
476483
unlock_page(page);
477484
}
@@ -517,12 +524,12 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
517524
min(end - index, (pgoff_t)PAGEVEC_SIZE),
518525
pvec.pages, indices);
519526
if (!pvec.nr) {
520-
if (index == start)
527+
if (index == start || unfalloc)
521528
break;
522529
index = start;
523530
continue;
524531
}
525-
if (index == start && indices[0] >= end) {
532+
if ((index == start || unfalloc) && indices[0] >= end) {
526533
shmem_deswap_pagevec(&pvec);
527534
pagevec_release(&pvec);
528535
break;
@@ -536,15 +543,19 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
536543
break;
537544

538545
if (radix_tree_exceptional_entry(page)) {
546+
if (unfalloc)
547+
continue;
539548
nr_swaps_freed += !shmem_free_swap(mapping,
540549
index, page);
541550
continue;
542551
}
543552

544553
lock_page(page);
545-
if (page->mapping == mapping) {
546-
VM_BUG_ON(PageWriteback(page));
547-
truncate_inode_page(mapping, page);
554+
if (!unfalloc || !PageUptodate(page)) {
555+
if (page->mapping == mapping) {
556+
VM_BUG_ON(PageWriteback(page));
557+
truncate_inode_page(mapping, page);
558+
}
548559
}
549560
unlock_page(page);
550561
}
@@ -558,7 +569,11 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
558569
info->swapped -= nr_swaps_freed;
559570
shmem_recalc_inode(inode);
560571
spin_unlock(&info->lock);
572+
}
561573

574+
void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
575+
{
576+
shmem_undo_range(inode, lstart, lend, false);
562577
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
563578
}
564579
EXPORT_SYMBOL_GPL(shmem_truncate_range);
@@ -771,6 +786,18 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
771786
WARN_ON_ONCE(1); /* Still happens? Tell us about it! */
772787
goto redirty;
773788
}
789+
790+
/*
791+
* This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC
792+
* value into swapfile.c, the only way we can correctly account for a
793+
* fallocated page arriving here is now to initialize it and write it.
794+
*/
795+
if (!PageUptodate(page)) {
796+
clear_highpage(page);
797+
flush_dcache_page(page);
798+
SetPageUptodate(page);
799+
}
800+
774801
swap = get_swap_page();
775802
if (!swap.val)
776803
goto redirty;
@@ -994,6 +1021,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
9941021
swp_entry_t swap;
9951022
int error;
9961023
int once = 0;
1024+
int alloced = 0;
9971025

9981026
if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
9991027
return -EFBIG;
@@ -1005,19 +1033,21 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
10051033
page = NULL;
10061034
}
10071035

1008-
if (sgp != SGP_WRITE &&
1036+
if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
10091037
((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
10101038
error = -EINVAL;
10111039
goto failed;
10121040
}
10131041

1042+
/* fallocated page? */
1043+
if (page && !PageUptodate(page)) {
1044+
if (sgp != SGP_READ)
1045+
goto clear;
1046+
unlock_page(page);
1047+
page_cache_release(page);
1048+
page = NULL;
1049+
}
10141050
if (page || (sgp == SGP_READ && !swap.val)) {
1015-
/*
1016-
* Once we can get the page lock, it must be uptodate:
1017-
* if there were an error in reading back from swap,
1018-
* the page would not be inserted into the filecache.
1019-
*/
1020-
BUG_ON(page && !PageUptodate(page));
10211051
*pagep = page;
10221052
return 0;
10231053
}
@@ -1114,9 +1144,18 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
11141144
inode->i_blocks += BLOCKS_PER_PAGE;
11151145
shmem_recalc_inode(inode);
11161146
spin_unlock(&info->lock);
1147+
alloced = true;
11171148

11181149
/*
1119-
* Let SGP_WRITE caller clear ends if write does not fill page
1150+
* Let SGP_FALLOC use the SGP_WRITE optimization on a new page.
1151+
*/
1152+
if (sgp == SGP_FALLOC)
1153+
sgp = SGP_WRITE;
1154+
clear:
1155+
/*
1156+
* Let SGP_WRITE caller clear ends if write does not fill page;
1157+
* but SGP_FALLOC on a page fallocated earlier must initialize
1158+
* it now, lest undo on failure cancel our earlier guarantee.
11201159
*/
11211160
if (sgp != SGP_WRITE) {
11221161
clear_highpage(page);
@@ -1128,10 +1167,13 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
11281167
}
11291168

11301169
/* Perhaps the file has been truncated since we checked */
1131-
if (sgp != SGP_WRITE &&
1170+
if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
11321171
((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
11331172
error = -EINVAL;
1134-
goto trunc;
1173+
if (alloced)
1174+
goto trunc;
1175+
else
1176+
goto failed;
11351177
}
11361178
*pagep = page;
11371179
return 0;
@@ -1140,13 +1182,15 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
11401182
* Error recovery.
11411183
*/
11421184
trunc:
1185+
info = SHMEM_I(inode);
11431186
ClearPageDirty(page);
11441187
delete_from_page_cache(page);
11451188
spin_lock(&info->lock);
11461189
info->alloced--;
11471190
inode->i_blocks -= BLOCKS_PER_PAGE;
11481191
spin_unlock(&info->lock);
11491192
decused:
1193+
sbinfo = SHMEM_SB(inode->i_sb);
11501194
if (sbinfo->max_blocks)
11511195
percpu_counter_add(&sbinfo->used_blocks, -1);
11521196
unacct:
@@ -1645,25 +1689,20 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
16451689
if (signal_pending(current))
16461690
error = -EINTR;
16471691
else
1648-
error = shmem_getpage(inode, index, &page, SGP_WRITE,
1692+
error = shmem_getpage(inode, index, &page, SGP_FALLOC,
16491693
NULL);
16501694
if (error) {
1651-
/*
1652-
* We really ought to free what we allocated so far,
1653-
* but it would be wrong to free pages allocated
1654-
* earlier, or already now in use: i_mutex does not
1655-
* exclude all cases. We do not know what to free.
1656-
*/
1695+
/* Remove the !PageUptodate pages we added */
1696+
shmem_undo_range(inode,
1697+
(loff_t)start << PAGE_CACHE_SHIFT,
1698+
(loff_t)index << PAGE_CACHE_SHIFT, true);
16571699
goto ctime;
16581700
}
16591701

1660-
if (!PageUptodate(page)) {
1661-
clear_highpage(page);
1662-
flush_dcache_page(page);
1663-
SetPageUptodate(page);
1664-
}
16651702
/*
1666-
* set_page_dirty so that memory pressure will swap rather
1703+
* If !PageUptodate, leave it that way so that freeable pages
1704+
* can be recognized if we need to rollback on error later.
1705+
* But set_page_dirty so that memory pressure will swap rather
16671706
* than free the pages we are allocating (and SGP_CACHE pages
16681707
* might still be clean: we now need to mark those dirty too).
16691708
*/

0 commit comments

Comments
 (0)