Skip to content

Commit bde05d1

Browse files
Hugh Dickinstorvalds
authored andcommitted
shmem: replace page if mapping excludes its zone
The GMA500 GPU driver uses GEM shmem objects, but with a new twist: the backing RAM has to be below 4GB. Not a problem while the boards supported only 4GB: but now Intel's D2700MUD boards support 8GB, and their GMA3600 is managed by the GMA500 driver. shmem/tmpfs has never pretended to support hardware restrictions on the backing memory, but it might have appeared to do so before v3.1, and even now it works fine until a page is swapped out then back in. When read_cache_page_gfp() supplied a freshly allocated page for copy, that compensated for whatever choice might have been made by earlier swapin readahead; but swapoff was likely to destroy the illusion. We'd like to continue to support GMA500, so now add a new shmem_should_replace_page() check on the zone when about to move a page from swapcache to filecache (in swapin and swapoff cases), with shmem_replace_page() to allocate and substitute a suitable page (given gma500/gem.c's mapping_set_gfp_mask GFP_KERNEL | __GFP_DMA32). This does involve a minor extension to mem_cgroup_replace_page_cache() (the page may or may not have already been charged); and I've removed a comment and call to mem_cgroup_uncharge_cache_page(), which in fact is always a no-op while PageSwapCache. Also removed optimization of an unlikely path in shmem_getpage_gfp(), now that we need to check PageSwapCache more carefully (a racing caller might already have made the copy). And at one point shmem_unuse_inode() needs to use the hitherto private page_swapcount(), to guard against racing with inode eviction. It would make sense to extend shmem_should_replace_page(), to cover cpuset and NUMA mempolicy restrictions too, but set that aside for now: needs a cleanup of shmem mempolicy handling, and more testing, and ought to handle swap faults in do_swap_page() as well as shmem. Signed-off-by: Hugh Dickins <hughd@google.com> Cc: Christoph Hellwig <hch@infradead.org> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Alan Cox <alan@lxorguk.ukuu.org.uk> Cc: Stephane Marchesin <marcheu@chromium.org> Cc: Andi Kleen <andi@firstfloor.org> Cc: Dave Airlie <airlied@gmail.com> Cc: Daniel Vetter <daniel@ffwll.ch> Cc: Rob Clark <rob.clark@linaro.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 5ceb9ce commit bde05d1

File tree

4 files changed

+142
-24
lines changed

4 files changed

+142
-24
lines changed

include/linux/swap.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,7 @@ extern int swap_type_of(dev_t, sector_t, struct block_device **);
351351
extern unsigned int count_swap_pages(int, int);
352352
extern sector_t map_swap_page(struct page *, struct block_device **);
353353
extern sector_t swapdev_block(int, pgoff_t);
354+
extern int page_swapcount(struct page *);
354355
extern int reuse_swap_page(struct page *);
355356
extern int try_to_free_swap(struct page *);
356357
struct backing_dev_info;
@@ -445,6 +446,11 @@ static inline void delete_from_swap_cache(struct page *page)
445446
{
446447
}
447448

449+
static inline int page_swapcount(struct page *page)
450+
{
451+
return 0;
452+
}
453+
448454
#define reuse_swap_page(page) (page_mapcount(page) == 1)
449455

450456
static inline int try_to_free_swap(struct page *page)

mm/memcontrol.c

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3373,7 +3373,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
33733373
void mem_cgroup_replace_page_cache(struct page *oldpage,
33743374
struct page *newpage)
33753375
{
3376-
struct mem_cgroup *memcg;
3376+
struct mem_cgroup *memcg = NULL;
33773377
struct page_cgroup *pc;
33783378
enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
33793379

@@ -3383,11 +3383,20 @@ void mem_cgroup_replace_page_cache(struct page *oldpage,
33833383
pc = lookup_page_cgroup(oldpage);
33843384
/* fix accounting on old pages */
33853385
lock_page_cgroup(pc);
3386-
memcg = pc->mem_cgroup;
3387-
mem_cgroup_charge_statistics(memcg, false, -1);
3388-
ClearPageCgroupUsed(pc);
3386+
if (PageCgroupUsed(pc)) {
3387+
memcg = pc->mem_cgroup;
3388+
mem_cgroup_charge_statistics(memcg, false, -1);
3389+
ClearPageCgroupUsed(pc);
3390+
}
33893391
unlock_page_cgroup(pc);
33903392

3393+
/*
3394+
* When called from shmem_replace_page(), in some cases the
3395+
* oldpage has already been charged, and in some cases not.
3396+
*/
3397+
if (!memcg)
3398+
return;
3399+
33913400
if (PageSwapBacked(oldpage))
33923401
type = MEM_CGROUP_CHARGE_TYPE_SHMEM;
33933402

mm/shmem.c

Lines changed: 122 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,9 @@ static unsigned long shmem_default_max_inodes(void)
103103
}
104104
#endif
105105

106+
static bool shmem_should_replace_page(struct page *page, gfp_t gfp);
107+
static int shmem_replace_page(struct page **pagep, gfp_t gfp,
108+
struct shmem_inode_info *info, pgoff_t index);
106109
static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
107110
struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type);
108111

@@ -604,12 +607,13 @@ static void shmem_evict_inode(struct inode *inode)
604607
* If swap found in inode, free it and move page from swapcache to filecache.
605608
*/
606609
static int shmem_unuse_inode(struct shmem_inode_info *info,
607-
swp_entry_t swap, struct page *page)
610+
swp_entry_t swap, struct page **pagep)
608611
{
609612
struct address_space *mapping = info->vfs_inode.i_mapping;
610613
void *radswap;
611614
pgoff_t index;
612-
int error;
615+
gfp_t gfp;
616+
int error = 0;
613617

614618
radswap = swp_to_radix_entry(swap);
615619
index = radix_tree_locate_item(&mapping->page_tree, radswap);
@@ -625,22 +629,37 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
625629
if (shmem_swaplist.next != &info->swaplist)
626630
list_move_tail(&shmem_swaplist, &info->swaplist);
627631

632+
gfp = mapping_gfp_mask(mapping);
633+
if (shmem_should_replace_page(*pagep, gfp)) {
634+
mutex_unlock(&shmem_swaplist_mutex);
635+
error = shmem_replace_page(pagep, gfp, info, index);
636+
mutex_lock(&shmem_swaplist_mutex);
637+
/*
638+
* We needed to drop mutex to make that restrictive page
639+
* allocation; but the inode might already be freed by now,
640+
* and we cannot refer to inode or mapping or info to check.
641+
* However, we do hold page lock on the PageSwapCache page,
642+
* so can check if that still has our reference remaining.
643+
*/
644+
if (!page_swapcount(*pagep))
645+
error = -ENOENT;
646+
}
647+
628648
/*
629649
* We rely on shmem_swaplist_mutex, not only to protect the swaplist,
630650
* but also to hold up shmem_evict_inode(): so inode cannot be freed
631651
* beneath us (pagelock doesn't help until the page is in pagecache).
632652
*/
633-
error = shmem_add_to_page_cache(page, mapping, index,
653+
if (!error)
654+
error = shmem_add_to_page_cache(*pagep, mapping, index,
634655
GFP_NOWAIT, radswap);
635-
/* which does mem_cgroup_uncharge_cache_page on error */
636-
637656
if (error != -ENOMEM) {
638657
/*
639658
* Truncation and eviction use free_swap_and_cache(), which
640659
* only does trylock page: if we raced, best clean up here.
641660
*/
642-
delete_from_swap_cache(page);
643-
set_page_dirty(page);
661+
delete_from_swap_cache(*pagep);
662+
set_page_dirty(*pagep);
644663
if (!error) {
645664
spin_lock(&info->lock);
646665
info->swapped--;
@@ -660,7 +679,14 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
660679
struct list_head *this, *next;
661680
struct shmem_inode_info *info;
662681
int found = 0;
663-
int error;
682+
int error = 0;
683+
684+
/*
685+
* There's a faint possibility that swap page was replaced before
686+
* caller locked it: it will come back later with the right page.
687+
*/
688+
if (unlikely(!PageSwapCache(page)))
689+
goto out;
664690

665691
/*
666692
* Charge page using GFP_KERNEL while we can wait, before taking
@@ -676,7 +702,7 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
676702
list_for_each_safe(this, next, &shmem_swaplist) {
677703
info = list_entry(this, struct shmem_inode_info, swaplist);
678704
if (info->swapped)
679-
found = shmem_unuse_inode(info, swap, page);
705+
found = shmem_unuse_inode(info, swap, &page);
680706
else
681707
list_del_init(&info->swaplist);
682708
cond_resched();
@@ -685,8 +711,6 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
685711
}
686712
mutex_unlock(&shmem_swaplist_mutex);
687713

688-
if (!found)
689-
mem_cgroup_uncharge_cache_page(page);
690714
if (found < 0)
691715
error = found;
692716
out:
@@ -855,6 +879,84 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
855879
}
856880
#endif
857881

882+
/*
883+
* When a page is moved from swapcache to shmem filecache (either by the
884+
* usual swapin of shmem_getpage_gfp(), or by the less common swapoff of
885+
* shmem_unuse_inode()), it may have been read in earlier from swap, in
886+
* ignorance of the mapping it belongs to. If that mapping has special
887+
* constraints (like the gma500 GEM driver, which requires RAM below 4GB),
888+
* we may need to copy to a suitable page before moving to filecache.
889+
*
890+
* In a future release, this may well be extended to respect cpuset and
891+
* NUMA mempolicy, and applied also to anonymous pages in do_swap_page();
892+
* but for now it is a simple matter of zone.
893+
*/
894+
static bool shmem_should_replace_page(struct page *page, gfp_t gfp)
895+
{
896+
return page_zonenum(page) > gfp_zone(gfp);
897+
}
898+
899+
static int shmem_replace_page(struct page **pagep, gfp_t gfp,
900+
struct shmem_inode_info *info, pgoff_t index)
901+
{
902+
struct page *oldpage, *newpage;
903+
struct address_space *swap_mapping;
904+
pgoff_t swap_index;
905+
int error;
906+
907+
oldpage = *pagep;
908+
swap_index = page_private(oldpage);
909+
swap_mapping = page_mapping(oldpage);
910+
911+
/*
912+
* We have arrived here because our zones are constrained, so don't
913+
* limit chance of success by further cpuset and node constraints.
914+
*/
915+
gfp &= ~GFP_CONSTRAINT_MASK;
916+
newpage = shmem_alloc_page(gfp, info, index);
917+
if (!newpage)
918+
return -ENOMEM;
919+
VM_BUG_ON(shmem_should_replace_page(newpage, gfp));
920+
921+
*pagep = newpage;
922+
page_cache_get(newpage);
923+
copy_highpage(newpage, oldpage);
924+
925+
VM_BUG_ON(!PageLocked(oldpage));
926+
__set_page_locked(newpage);
927+
VM_BUG_ON(!PageUptodate(oldpage));
928+
SetPageUptodate(newpage);
929+
VM_BUG_ON(!PageSwapBacked(oldpage));
930+
SetPageSwapBacked(newpage);
931+
VM_BUG_ON(!swap_index);
932+
set_page_private(newpage, swap_index);
933+
VM_BUG_ON(!PageSwapCache(oldpage));
934+
SetPageSwapCache(newpage);
935+
936+
/*
937+
* Our caller will very soon move newpage out of swapcache, but it's
938+
* a nice clean interface for us to replace oldpage by newpage there.
939+
*/
940+
spin_lock_irq(&swap_mapping->tree_lock);
941+
error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
942+
newpage);
943+
__inc_zone_page_state(newpage, NR_FILE_PAGES);
944+
__dec_zone_page_state(oldpage, NR_FILE_PAGES);
945+
spin_unlock_irq(&swap_mapping->tree_lock);
946+
BUG_ON(error);
947+
948+
mem_cgroup_replace_page_cache(oldpage, newpage);
949+
lru_cache_add_anon(newpage);
950+
951+
ClearPageSwapCache(oldpage);
952+
set_page_private(oldpage, 0);
953+
954+
unlock_page(oldpage);
955+
page_cache_release(oldpage);
956+
page_cache_release(oldpage);
957+
return 0;
958+
}
959+
858960
/*
859961
* shmem_getpage_gfp - find page in cache, or get from swap, or allocate
860962
*
@@ -923,19 +1025,20 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
9231025

9241026
/* We have to do this with page locked to prevent races */
9251027
lock_page(page);
1028+
if (!PageSwapCache(page) || page->mapping) {
1029+
error = -EEXIST; /* try again */
1030+
goto failed;
1031+
}
9261032
if (!PageUptodate(page)) {
9271033
error = -EIO;
9281034
goto failed;
9291035
}
9301036
wait_on_page_writeback(page);
9311037

932-
/* Someone may have already done it for us */
933-
if (page->mapping) {
934-
if (page->mapping == mapping &&
935-
page->index == index)
936-
goto done;
937-
error = -EEXIST;
938-
goto failed;
1038+
if (shmem_should_replace_page(page, gfp)) {
1039+
error = shmem_replace_page(&page, gfp, info, index);
1040+
if (error)
1041+
goto failed;
9391042
}
9401043

9411044
error = mem_cgroup_cache_charge(page, current->mm,
@@ -998,7 +1101,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
9981101
if (sgp == SGP_DIRTY)
9991102
set_page_dirty(page);
10001103
}
1001-
done:
1104+
10021105
/* Perhaps the file has been truncated since we checked */
10031106
if (sgp != SGP_WRITE &&
10041107
((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {

mm/swapfile.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -601,7 +601,7 @@ void swapcache_free(swp_entry_t entry, struct page *page)
601601
* This does not give an exact answer when swap count is continued,
602602
* but does include the high COUNT_CONTINUED flag to allow for that.
603603
*/
604-
static inline int page_swapcount(struct page *page)
604+
int page_swapcount(struct page *page)
605605
{
606606
int count = 0;
607607
struct swap_info_struct *p;

0 commit comments

Comments
 (0)