Skip to content

Commit 9899d11

Browse files
oleg-nesterovtorvalds
authored andcommitted
ptrace: ensure arch_ptrace/ptrace_request can never race with SIGKILL
putreg() assumes that the tracee is not running and pt_regs_access() can safely play with its stack. However a killed tracee can return from ptrace_stop() to the low-level asm code and do RESTORE_REST, this means that debugger can actually read/modify the kernel stack until the tracee does SAVE_REST again. set_task_blockstep() can race with SIGKILL too and in some sense this race is even worse, the very fact the tracee can be woken up breaks the logic. As Linus suggested we can clear TASK_WAKEKILL around the arch_ptrace() call, this ensures that nobody can ever wakeup the tracee while the debugger looks at it. Not only this fixes the mentioned problems, we can do some cleanups/simplifications in arch_ptrace() paths. Probably ptrace_unfreeze_traced() needs more callers, for example it makes sense to make the tracee killable for oom-killer before access_process_vm(). While at it, add the comment into may_ptrace_stop() to explain why ptrace_stop() still can't rely on SIGKILL and signal_pending_state(). Reported-by: Salman Qazi <sqazi@google.com> Reported-by: Suleiman Souhlal <suleiman@google.com> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 910ffdb commit 9899d11

File tree

3 files changed

+64
-14
lines changed

3 files changed

+64
-14
lines changed

arch/x86/kernel/step.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -165,10 +165,11 @@ void set_task_blockstep(struct task_struct *task, bool on)
165165
* Ensure irq/preemption can't change debugctl in between.
166166
* Note also that both TIF_BLOCKSTEP and debugctl should
167167
* be changed atomically wrt preemption.
168-
* FIXME: this means that set/clear TIF_BLOCKSTEP is simply
169-
* wrong if task != current, SIGKILL can wakeup the stopped
170-
* tracee and set/clear can play with the running task, this
171-
* can confuse the next __switch_to_xtra().
168+
*
169+
* NOTE: this means that set/clear TIF_BLOCKSTEP is only safe if
170+
* task is current or it can't be running, otherwise we can race
171+
* with __switch_to_xtra(). We rely on ptrace_freeze_traced() but
172+
* PTRACE_KILL is not safe.
172173
*/
173174
local_irq_disable();
174175
debugctl = get_debugctlmsr();

kernel/ptrace.c

Lines changed: 54 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,40 @@ void __ptrace_unlink(struct task_struct *child)
122122
spin_unlock(&child->sighand->siglock);
123123
}
124124

125+
/* Ensure that nothing can wake it up, even SIGKILL */
126+
static bool ptrace_freeze_traced(struct task_struct *task)
127+
{
128+
bool ret = false;
129+
130+
/* Lockless, nobody but us can set this flag */
131+
if (task->jobctl & JOBCTL_LISTENING)
132+
return ret;
133+
134+
spin_lock_irq(&task->sighand->siglock);
135+
if (task_is_traced(task) && !__fatal_signal_pending(task)) {
136+
task->state = __TASK_TRACED;
137+
ret = true;
138+
}
139+
spin_unlock_irq(&task->sighand->siglock);
140+
141+
return ret;
142+
}
143+
144+
static void ptrace_unfreeze_traced(struct task_struct *task)
145+
{
146+
if (task->state != __TASK_TRACED)
147+
return;
148+
149+
WARN_ON(!task->ptrace || task->parent != current);
150+
151+
spin_lock_irq(&task->sighand->siglock);
152+
if (__fatal_signal_pending(task))
153+
wake_up_state(task, __TASK_TRACED);
154+
else
155+
task->state = TASK_TRACED;
156+
spin_unlock_irq(&task->sighand->siglock);
157+
}
158+
125159
/**
126160
* ptrace_check_attach - check whether ptracee is ready for ptrace operation
127161
* @child: ptracee to check for
@@ -151,24 +185,29 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
151185
* be changed by us so it's not changing right after this.
152186
*/
153187
read_lock(&tasklist_lock);
154-
if ((child->ptrace & PT_PTRACED) && child->parent == current) {
188+
if (child->ptrace && child->parent == current) {
189+
WARN_ON(child->state == __TASK_TRACED);
155190
/*
156191
* child->sighand can't be NULL, release_task()
157192
* does ptrace_unlink() before __exit_signal().
158193
*/
159-
spin_lock_irq(&child->sighand->siglock);
160-
WARN_ON_ONCE(task_is_stopped(child));
161-
if (ignore_state || (task_is_traced(child) &&
162-
!(child->jobctl & JOBCTL_LISTENING)))
194+
if (ignore_state || ptrace_freeze_traced(child))
163195
ret = 0;
164-
spin_unlock_irq(&child->sighand->siglock);
165196
}
166197
read_unlock(&tasklist_lock);
167198

168-
if (!ret && !ignore_state)
169-
ret = wait_task_inactive(child, TASK_TRACED) ? 0 : -ESRCH;
199+
if (!ret && !ignore_state) {
200+
if (!wait_task_inactive(child, __TASK_TRACED)) {
201+
/*
202+
* This can only happen if may_ptrace_stop() fails and
203+
* ptrace_stop() changes ->state back to TASK_RUNNING,
204+
* so we should not worry about leaking __TASK_TRACED.
205+
*/
206+
WARN_ON(child->state == __TASK_TRACED);
207+
ret = -ESRCH;
208+
}
209+
}
170210

171-
/* All systems go.. */
172211
return ret;
173212
}
174213

@@ -900,6 +939,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
900939
goto out_put_task_struct;
901940

902941
ret = arch_ptrace(child, request, addr, data);
942+
if (ret || request != PTRACE_DETACH)
943+
ptrace_unfreeze_traced(child);
903944

904945
out_put_task_struct:
905946
put_task_struct(child);
@@ -1039,8 +1080,11 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
10391080

10401081
ret = ptrace_check_attach(child, request == PTRACE_KILL ||
10411082
request == PTRACE_INTERRUPT);
1042-
if (!ret)
1083+
if (!ret) {
10431084
ret = compat_arch_ptrace(child, request, addr, data);
1085+
if (ret || request != PTRACE_DETACH)
1086+
ptrace_unfreeze_traced(child);
1087+
}
10441088

10451089
out_put_task_struct:
10461090
put_task_struct(child);

kernel/signal.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1794,6 +1794,10 @@ static inline int may_ptrace_stop(void)
17941794
* If SIGKILL was already sent before the caller unlocked
17951795
* ->siglock we must see ->core_state != NULL. Otherwise it
17961796
* is safe to enter schedule().
1797+
*
1798+
* This is almost outdated, a task with the pending SIGKILL can't
1799+
* block in TASK_TRACED. But PTRACE_EVENT_EXIT can be reported
1800+
* after SIGKILL was already dequeued.
17971801
*/
17981802
if (unlikely(current->mm->core_state) &&
17991803
unlikely(current->mm == current->parent->mm))
@@ -1919,6 +1923,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
19191923
if (gstop_done)
19201924
do_notify_parent_cldstop(current, false, why);
19211925

1926+
/* tasklist protects us from ptrace_freeze_traced() */
19221927
__set_current_state(TASK_RUNNING);
19231928
if (clear_code)
19241929
current->exit_code = 0;

0 commit comments

Comments
 (0)