Skip to content

Commit 44a70ad

Browse files
Michal Hockotorvalds
authored andcommitted
mm, oom_adj: make sure processes sharing mm have same view of oom_score_adj
oom_score_adj is shared for the thread groups (via struct signal) but this is not sufficient to cover processes sharing mm (CLONE_VM without CLONE_SIGHAND) and so we can easily end up in a situation when some processes update their oom_score_adj and confuse the oom killer. In the worst case some of those processes might hide from the oom killer altogether via OOM_SCORE_ADJ_MIN while others are eligible. OOM killer would then pick up those eligible but won't be allowed to kill others sharing the same mm so the mm wouldn't release the mm and so the memory. It would be ideal to have the oom_score_adj per mm_struct because that is the natural entity OOM killer considers. But this will not work because some programs are doing vfork() set_oom_adj() exec() We can achieve the same though. oom_score_adj write handler can set the oom_score_adj for all processes sharing the same mm if the task is not in the middle of vfork. As a result all the processes will share the same oom_score_adj. The current implementation is rather pessimistic and checks all the existing processes by default if there is more than 1 holder of the mm but we do not have any reliable way to check for external users yet. Link: http://lkml.kernel.org/r/1466426628-15074-5-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko <mhocko@suse.com> Acked-by: Oleg Nesterov <oleg@redhat.com> Cc: Vladimir Davydov <vdavydov@virtuozzo.com> Cc: David Rientjes <rientjes@google.com> Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 1d5f0ac commit 44a70ad

File tree

3 files changed

+49
-1
lines changed

3 files changed

+49
-1
lines changed

fs/proc/base.c

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1040,6 +1040,7 @@ static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
10401040
static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
10411041
{
10421042
static DEFINE_MUTEX(oom_adj_mutex);
1043+
struct mm_struct *mm = NULL;
10431044
struct task_struct *task;
10441045
int err = 0;
10451046

@@ -1069,10 +1070,55 @@ static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
10691070
}
10701071
}
10711072

1073+
/*
1074+
* Make sure we will check other processes sharing the mm if this is
1075+
* not vfrok which wants its own oom_score_adj.
1076+
* pin the mm so it doesn't go away and get reused after task_unlock
1077+
*/
1078+
if (!task->vfork_done) {
1079+
struct task_struct *p = find_lock_task_mm(task);
1080+
1081+
if (p) {
1082+
if (atomic_read(&p->mm->mm_users) > 1) {
1083+
mm = p->mm;
1084+
atomic_inc(&mm->mm_count);
1085+
}
1086+
task_unlock(p);
1087+
}
1088+
}
1089+
10721090
task->signal->oom_score_adj = oom_adj;
10731091
if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
10741092
task->signal->oom_score_adj_min = (short)oom_adj;
10751093
trace_oom_score_adj_update(task);
1094+
1095+
if (mm) {
1096+
struct task_struct *p;
1097+
1098+
rcu_read_lock();
1099+
for_each_process(p) {
1100+
if (same_thread_group(task, p))
1101+
continue;
1102+
1103+
/* do not touch kernel threads or the global init */
1104+
if (p->flags & PF_KTHREAD || is_global_init(p))
1105+
continue;
1106+
1107+
task_lock(p);
1108+
if (!p->vfork_done && process_shares_mm(p, mm)) {
1109+
pr_info("updating oom_score_adj for %d (%s) from %d to %d because it shares mm with %d (%s). Report if this is unexpected.\n",
1110+
task_pid_nr(p), p->comm,
1111+
p->signal->oom_score_adj, oom_adj,
1112+
task_pid_nr(task), task->comm);
1113+
p->signal->oom_score_adj = oom_adj;
1114+
if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
1115+
p->signal->oom_score_adj_min = (short)oom_adj;
1116+
}
1117+
task_unlock(p);
1118+
}
1119+
rcu_read_unlock();
1120+
mmdrop(mm);
1121+
}
10761122
err_unlock:
10771123
mutex_unlock(&oom_adj_mutex);
10781124
put_task_struct(task);

include/linux/mm.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2284,6 +2284,8 @@ static inline int in_gate_area(struct mm_struct *mm, unsigned long addr)
22842284
}
22852285
#endif /* __HAVE_ARCH_GATE_AREA */
22862286

2287+
extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm);
2288+
22872289
#ifdef CONFIG_SYSCTL
22882290
extern int sysctl_drop_caches;
22892291
int drop_caches_sysctl_handler(struct ctl_table *, int,

mm/oom_kill.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,7 @@ bool oom_killer_disabled __read_mostly;
415415
* task's threads: if one of those is using this mm then this task was also
416416
* using it.
417417
*/
418-
static bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
418+
bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
419419
{
420420
struct task_struct *t;
421421

0 commit comments

Comments
 (0)