Skip to content

Commit e9714ac

Browse files
koct9itorvalds
authored andcommitted
mm: kill vma flag VM_EXECUTABLE and mm->num_exe_file_vmas
Currently the kernel sets mm->exe_file during sys_execve() and then tracks number of vmas with VM_EXECUTABLE flag in mm->num_exe_file_vmas, as soon as this counter drops to zero kernel resets mm->exe_file to NULL. Plus it resets mm->exe_file at last mmput() when mm->mm_users drops to zero. VMA with VM_EXECUTABLE flag appears after mapping file with flag MAP_EXECUTABLE, such vmas can appears only at sys_execve() or after vma splitting, because sys_mmap ignores this flag. Usually binfmt module sets mm->exe_file and mmaps executable vmas with this file, they hold mm->exe_file while task is running. comment from v2.6.25-6245-g925d1c4 ("procfs task exe symlink"), where all this stuff was introduced: > The kernel implements readlink of /proc/pid/exe by getting the file from > the first executable VMA. Then the path to the file is reconstructed and > reported as the result. > > Because of the VMA walk the code is slightly different on nommu systems. > This patch avoids separate /proc/pid/exe code on nommu systems. Instead of > walking the VMAs to find the first executable file-backed VMA we store a > reference to the exec'd file in the mm_struct. > > That reference would prevent the filesystem holding the executable file > from being unmounted even after unmapping the VMAs. So we track the number > of VM_EXECUTABLE VMAs and drop the new reference when the last one is > unmapped. This avoids pinning the mounted filesystem. exe_file's vma accounting is hooked into every file mmap/unmmap and vma split/merge just to fix some hypothetical pinning fs from umounting by mm, which already unmapped all its executable files, but still alive. Seems like currently nobody depends on this behaviour. We can try to remove this logic and keep mm->exe_file until final mmput(). mm->exe_file is still protected with mm->mmap_sem, because we want to change it via new sys_prctl(PR_SET_MM_EXE_FILE). Also via this syscall task can change its mm->exe_file and unpin mountpoint explicitly. Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Carsten Otte <cotte@de.ibm.com> Cc: Chris Metcalf <cmetcalf@tilera.com> Cc: Cyrill Gorcunov <gorcunov@openvz.org> Cc: Eric Paris <eparis@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Hugh Dickins <hughd@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Morris <james.l.morris@oracle.com> Cc: Jason Baron <jbaron@redhat.com> Cc: Kentaro Takeda <takedakn@nttdata.co.jp> Cc: Matt Helsley <matthltc@us.ibm.com> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Robert Richter <robert.richter@amd.com> Cc: Suresh Siddha <suresh.b.siddha@intel.com> Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Cc: Venkatesh Pallipadi <venki@google.com> Acked-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 2dd8ad8 commit e9714ac

File tree

6 files changed

+5
-58
lines changed

6 files changed

+5
-58
lines changed

include/linux/mm.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,6 @@ extern unsigned int kobjsize(const void *objp);
8787
#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */
8888
#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */
8989

90-
#define VM_EXECUTABLE 0x00001000
9190
#define VM_LOCKED 0x00002000
9291
#define VM_IO 0x00004000 /* Memory mapped I/O or similar */
9392

@@ -1396,9 +1395,6 @@ extern void exit_mmap(struct mm_struct *);
13961395
extern int mm_take_all_locks(struct mm_struct *mm);
13971396
extern void mm_drop_all_locks(struct mm_struct *mm);
13981397

1399-
/* From fs/proc/base.c. callers must _not_ hold the mm's exe_file_lock */
1400-
extern void added_exe_file_vma(struct mm_struct *mm);
1401-
extern void removed_exe_file_vma(struct mm_struct *mm);
14021398
extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
14031399
extern struct file *get_mm_exe_file(struct mm_struct *mm);
14041400

include/linux/mm_types.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,6 @@ struct mm_struct {
394394

395395
/* store ref to file /proc/<pid>/exe symlink points to */
396396
struct file *exe_file;
397-
unsigned long num_exe_file_vmas;
398397
#ifdef CONFIG_MMU_NOTIFIER
399398
struct mmu_notifier_mm *mmu_notifier_mm;
400399
#endif

include/linux/mman.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@ calc_vm_flag_bits(unsigned long flags)
8686
{
8787
return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) |
8888
_calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) |
89-
_calc_vm_trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE) |
9089
_calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED );
9190
}
9291
#endif /* __KERNEL__ */

kernel/fork.c

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -622,34 +622,13 @@ void mmput(struct mm_struct *mm)
622622
}
623623
EXPORT_SYMBOL_GPL(mmput);
624624

625-
/*
626-
* We added or removed a vma mapping the executable. The vmas are only mapped
627-
* during exec and are not mapped with the mmap system call.
628-
* Callers must hold down_write() on the mm's mmap_sem for these
629-
*/
630-
void added_exe_file_vma(struct mm_struct *mm)
631-
{
632-
mm->num_exe_file_vmas++;
633-
}
634-
635-
void removed_exe_file_vma(struct mm_struct *mm)
636-
{
637-
mm->num_exe_file_vmas--;
638-
if ((mm->num_exe_file_vmas == 0) && mm->exe_file) {
639-
fput(mm->exe_file);
640-
mm->exe_file = NULL;
641-
}
642-
643-
}
644-
645625
void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
646626
{
647627
if (new_exe_file)
648628
get_file(new_exe_file);
649629
if (mm->exe_file)
650630
fput(mm->exe_file);
651631
mm->exe_file = new_exe_file;
652-
mm->num_exe_file_vmas = 0;
653632
}
654633

655634
struct file *get_mm_exe_file(struct mm_struct *mm)

mm/mmap.c

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -231,11 +231,8 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
231231
might_sleep();
232232
if (vma->vm_ops && vma->vm_ops->close)
233233
vma->vm_ops->close(vma);
234-
if (vma->vm_file) {
234+
if (vma->vm_file)
235235
fput(vma->vm_file);
236-
if (vma->vm_flags & VM_EXECUTABLE)
237-
removed_exe_file_vma(vma->vm_mm);
238-
}
239236
mpol_put(vma_policy(vma));
240237
kmem_cache_free(vm_area_cachep, vma);
241238
return next;
@@ -636,8 +633,6 @@ again: remove_next = 1 + (end > next->vm_end);
636633
if (file) {
637634
uprobe_munmap(next, next->vm_start, next->vm_end);
638635
fput(file);
639-
if (next->vm_flags & VM_EXECUTABLE)
640-
removed_exe_file_vma(mm);
641636
}
642637
if (next->anon_vma)
643638
anon_vma_merge(vma, next);
@@ -1304,8 +1299,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
13041299
error = file->f_op->mmap(file, vma);
13051300
if (error)
13061301
goto unmap_and_free_vma;
1307-
if (vm_flags & VM_EXECUTABLE)
1308-
added_exe_file_vma(mm);
13091302

13101303
/* Can addr have changed??
13111304
*
@@ -1987,11 +1980,8 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
19871980
if (anon_vma_clone(new, vma))
19881981
goto out_free_mpol;
19891982

1990-
if (new->vm_file) {
1983+
if (new->vm_file)
19911984
get_file(new->vm_file);
1992-
if (vma->vm_flags & VM_EXECUTABLE)
1993-
added_exe_file_vma(mm);
1994-
}
19951985

19961986
if (new->vm_ops && new->vm_ops->open)
19971987
new->vm_ops->open(new);
@@ -2009,11 +1999,8 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
20091999
/* Clean everything up if vma_adjust failed. */
20102000
if (new->vm_ops && new->vm_ops->close)
20112001
new->vm_ops->close(new);
2012-
if (new->vm_file) {
2013-
if (vma->vm_flags & VM_EXECUTABLE)
2014-
removed_exe_file_vma(mm);
2002+
if (new->vm_file)
20152003
fput(new->vm_file);
2016-
}
20172004
unlink_anon_vmas(new);
20182005
out_free_mpol:
20192006
mpol_put(pol);
@@ -2408,12 +2395,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
24082395
new_vma->vm_start = addr;
24092396
new_vma->vm_end = addr + len;
24102397
new_vma->vm_pgoff = pgoff;
2411-
if (new_vma->vm_file) {
2398+
if (new_vma->vm_file)
24122399
get_file(new_vma->vm_file);
2413-
2414-
if (vma->vm_flags & VM_EXECUTABLE)
2415-
added_exe_file_vma(mm);
2416-
}
24172400
if (new_vma->vm_ops && new_vma->vm_ops->open)
24182401
new_vma->vm_ops->open(new_vma);
24192402
vma_link(mm, new_vma, prev, rb_link, rb_parent);

mm/nommu.c

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -789,11 +789,8 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
789789
kenter("%p", vma);
790790
if (vma->vm_ops && vma->vm_ops->close)
791791
vma->vm_ops->close(vma);
792-
if (vma->vm_file) {
792+
if (vma->vm_file)
793793
fput(vma->vm_file);
794-
if (vma->vm_flags & VM_EXECUTABLE)
795-
removed_exe_file_vma(mm);
796-
}
797794
put_nommu_region(vma->vm_region);
798795
kmem_cache_free(vm_area_cachep, vma);
799796
}
@@ -1284,10 +1281,6 @@ unsigned long do_mmap_pgoff(struct file *file,
12841281
if (file) {
12851282
region->vm_file = get_file(file);
12861283
vma->vm_file = get_file(file);
1287-
if (vm_flags & VM_EXECUTABLE) {
1288-
added_exe_file_vma(current->mm);
1289-
vma->vm_mm = current->mm;
1290-
}
12911284
}
12921285

12931286
down_write(&nommu_region_sem);
@@ -1440,8 +1433,6 @@ unsigned long do_mmap_pgoff(struct file *file,
14401433
kmem_cache_free(vm_region_jar, region);
14411434
if (vma->vm_file)
14421435
fput(vma->vm_file);
1443-
if (vma->vm_flags & VM_EXECUTABLE)
1444-
removed_exe_file_vma(vma->vm_mm);
14451436
kmem_cache_free(vm_area_cachep, vma);
14461437
kleave(" = %d", ret);
14471438
return ret;

0 commit comments

Comments
 (0)