Skip to content

Commit a9dce66

Browse files
committed
Merge tag 'pidfd-v5.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux
Pull pidfd system call from Christian Brauner: "This introduces the ability to use file descriptors from /proc/<pid>/ as stable handles on struct pid. Even if a pid is recycled the handle will not change. For a start these fds can be used to send signals to the processes they refer to. With the ability to use /proc/<pid> fds as stable handles on struct pid we can fix a long-standing issue where after a process has exited its pid can be reused by another process. If a caller sends a signal to a reused pid it will end up signaling the wrong process. With this patchset we enable a variety of use cases. One obvious example is that we can now safely delegate an important part of process management - sending signals - to processes other than the parent of a given process by sending file descriptors around via scm rights and not fearing that the given process will have been recycled in the meantime. It also allows for easy testing whether a given process is still alive or not by sending signal 0 to a pidfd which is quite handy. There has been some interest in this feature e.g. from systems management (systemd, glibc) and container managers. I have requested and gotten comments from glibc to make sure that this syscall is suitable for their needs as well. In the future I expect it to take on most other pid-based signal syscalls. But such features are left for the future once they are needed. This has been sitting in linux-next for quite a while and has not caused any issues. It comes with selftests which verify basic functionality and also test that a recycled pid cannot be signaled via a pidfd. Jon has written about a prior version of this patchset. It should cover the basic functionality since not a lot has changed since then: https://lwn.net/Articles/773459/ The commit message for the syscall itself is extensively documenting the syscall, including it's functionality and extensibility" * tag 'pidfd-v5.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux: selftests: add tests for pidfd_send_signal() signal: add pidfd_send_signal() syscall
2 parents f67e3fb + 575a0ae commit a9dce66

File tree

11 files changed

+538
-6
lines changed

11 files changed

+538
-6
lines changed

arch/x86/entry/syscalls/syscall_32.tbl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,7 @@
429429
421 i386 rt_sigtimedwait_time64 sys_rt_sigtimedwait __ia32_compat_sys_rt_sigtimedwait_time64
430430
422 i386 futex_time64 sys_futex __ia32_sys_futex
431431
423 i386 sched_rr_get_interval_time64 sys_sched_rr_get_interval __ia32_sys_sched_rr_get_interval
432+
424 i386 pidfd_send_signal sys_pidfd_send_signal __ia32_sys_pidfd_send_signal
432433
425 i386 io_uring_setup sys_io_uring_setup __ia32_sys_io_uring_setup
433434
426 i386 io_uring_enter sys_io_uring_enter __ia32_sys_io_uring_enter
434435
427 i386 io_uring_register sys_io_uring_register __ia32_sys_io_uring_register

arch/x86/entry/syscalls/syscall_64.tbl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,7 @@
345345
334 common rseq __x64_sys_rseq
346346
# don't use numbers 387 through 423, add new calls after the last
347347
# 'common' entry
348+
424 common pidfd_send_signal __x64_sys_pidfd_send_signal
348349
425 common io_uring_setup __x64_sys_io_uring_setup
349350
426 common io_uring_enter __x64_sys_io_uring_enter
350351
427 common io_uring_register __x64_sys_io_uring_register

fs/proc/base.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3074,6 +3074,15 @@ static const struct file_operations proc_tgid_base_operations = {
30743074
.llseek = generic_file_llseek,
30753075
};
30763076

3077+
struct pid *tgid_pidfd_to_pid(const struct file *file)
3078+
{
3079+
if (!d_is_dir(file->f_path.dentry) ||
3080+
(file->f_op != &proc_tgid_base_operations))
3081+
return ERR_PTR(-EBADF);
3082+
3083+
return proc_pid(file_inode(file));
3084+
}
3085+
30773086
static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
30783087
{
30793088
return proc_pident_lookup(dir, dentry,

include/linux/proc_fs.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo
7373
int (*show)(struct seq_file *, void *),
7474
proc_write_t write,
7575
void *data);
76+
extern struct pid *tgid_pidfd_to_pid(const struct file *file);
7677

7778
#else /* CONFIG_PROC_FS */
7879

@@ -114,6 +115,11 @@ static inline int remove_proc_subtree(const char *name, struct proc_dir_entry *p
114115
#define proc_create_net(name, mode, parent, state_size, ops) ({NULL;})
115116
#define proc_create_net_single(name, mode, parent, show, data) ({NULL;})
116117

118+
static inline struct pid *tgid_pidfd_to_pid(const struct file *file)
119+
{
120+
return ERR_PTR(-EBADF);
121+
}
122+
117123
#endif /* CONFIG_PROC_FS */
118124

119125
struct net;

include/linux/syscalls.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -985,6 +985,9 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags,
985985
unsigned mask, struct statx __user *buffer);
986986
asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len,
987987
int flags, uint32_t sig);
988+
asmlinkage long sys_pidfd_send_signal(int pidfd, int sig,
989+
siginfo_t __user *info,
990+
unsigned int flags);
988991

989992
/*
990993
* Architecture-specific system calls

include/uapi/asm-generic/unistd.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -824,6 +824,8 @@ __SYSCALL(__NR_futex_time64, sys_futex)
824824
__SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval)
825825
#endif
826826

827+
#define __NR_pidfd_send_signal 424
828+
__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal)
827829
#define __NR_io_uring_setup 425
828830
__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup)
829831
#define __NR_io_uring_enter 426

kernel/signal.c

Lines changed: 127 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919
#include <linux/sched/task.h>
2020
#include <linux/sched/task_stack.h>
2121
#include <linux/sched/cputime.h>
22+
#include <linux/file.h>
2223
#include <linux/fs.h>
24+
#include <linux/proc_fs.h>
2325
#include <linux/tty.h>
2426
#include <linux/binfmts.h>
2527
#include <linux/coredump.h>
@@ -3487,6 +3489,16 @@ COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait_time32, compat_sigset_t __user *, uthese,
34873489
#endif
34883490
#endif
34893491

3492+
static inline void prepare_kill_siginfo(int sig, struct kernel_siginfo *info)
3493+
{
3494+
clear_siginfo(info);
3495+
info->si_signo = sig;
3496+
info->si_errno = 0;
3497+
info->si_code = SI_USER;
3498+
info->si_pid = task_tgid_vnr(current);
3499+
info->si_uid = from_kuid_munged(current_user_ns(), current_uid());
3500+
}
3501+
34903502
/**
34913503
* sys_kill - send a signal to a process
34923504
* @pid: the PID of the process
@@ -3496,16 +3508,125 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
34963508
{
34973509
struct kernel_siginfo info;
34983510

3499-
clear_siginfo(&info);
3500-
info.si_signo = sig;
3501-
info.si_errno = 0;
3502-
info.si_code = SI_USER;
3503-
info.si_pid = task_tgid_vnr(current);
3504-
info.si_uid = from_kuid_munged(current_user_ns(), current_uid());
3511+
prepare_kill_siginfo(sig, &info);
35053512

35063513
return kill_something_info(sig, &info, pid);
35073514
}
35083515

3516+
#ifdef CONFIG_PROC_FS
3517+
/*
3518+
* Verify that the signaler and signalee either are in the same pid namespace
3519+
* or that the signaler's pid namespace is an ancestor of the signalee's pid
3520+
* namespace.
3521+
*/
3522+
static bool access_pidfd_pidns(struct pid *pid)
3523+
{
3524+
struct pid_namespace *active = task_active_pid_ns(current);
3525+
struct pid_namespace *p = ns_of_pid(pid);
3526+
3527+
for (;;) {
3528+
if (!p)
3529+
return false;
3530+
if (p == active)
3531+
break;
3532+
p = p->parent;
3533+
}
3534+
3535+
return true;
3536+
}
3537+
3538+
static int copy_siginfo_from_user_any(kernel_siginfo_t *kinfo, siginfo_t *info)
3539+
{
3540+
#ifdef CONFIG_COMPAT
3541+
/*
3542+
* Avoid hooking up compat syscalls and instead handle necessary
3543+
* conversions here. Note, this is a stop-gap measure and should not be
3544+
* considered a generic solution.
3545+
*/
3546+
if (in_compat_syscall())
3547+
return copy_siginfo_from_user32(
3548+
kinfo, (struct compat_siginfo __user *)info);
3549+
#endif
3550+
return copy_siginfo_from_user(kinfo, info);
3551+
}
3552+
3553+
/**
3554+
* sys_pidfd_send_signal - send a signal to a process through a task file
3555+
* descriptor
3556+
* @pidfd: the file descriptor of the process
3557+
* @sig: signal to be sent
3558+
* @info: the signal info
3559+
* @flags: future flags to be passed
3560+
*
3561+
* The syscall currently only signals via PIDTYPE_PID which covers
3562+
* kill(<positive-pid>, <signal>. It does not signal threads or process
3563+
* groups.
3564+
* In order to extend the syscall to threads and process groups the @flags
3565+
* argument should be used. In essence, the @flags argument will determine
3566+
* what is signaled and not the file descriptor itself. Put in other words,
3567+
* grouping is a property of the flags argument not a property of the file
3568+
* descriptor.
3569+
*
3570+
* Return: 0 on success, negative errno on failure
3571+
*/
3572+
SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
3573+
siginfo_t __user *, info, unsigned int, flags)
3574+
{
3575+
int ret;
3576+
struct fd f;
3577+
struct pid *pid;
3578+
kernel_siginfo_t kinfo;
3579+
3580+
/* Enforce flags be set to 0 until we add an extension. */
3581+
if (flags)
3582+
return -EINVAL;
3583+
3584+
f = fdget_raw(pidfd);
3585+
if (!f.file)
3586+
return -EBADF;
3587+
3588+
/* Is this a pidfd? */
3589+
pid = tgid_pidfd_to_pid(f.file);
3590+
if (IS_ERR(pid)) {
3591+
ret = PTR_ERR(pid);
3592+
goto err;
3593+
}
3594+
3595+
ret = -EINVAL;
3596+
if (!access_pidfd_pidns(pid))
3597+
goto err;
3598+
3599+
if (info) {
3600+
ret = copy_siginfo_from_user_any(&kinfo, info);
3601+
if (unlikely(ret))
3602+
goto err;
3603+
3604+
ret = -EINVAL;
3605+
if (unlikely(sig != kinfo.si_signo))
3606+
goto err;
3607+
3608+
if ((task_pid(current) != pid) &&
3609+
(kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL)) {
3610+
/* Only allow sending arbitrary signals to yourself. */
3611+
ret = -EPERM;
3612+
if (kinfo.si_code != SI_USER)
3613+
goto err;
3614+
3615+
/* Turn this into a regular kill signal. */
3616+
prepare_kill_siginfo(sig, &kinfo);
3617+
}
3618+
} else {
3619+
prepare_kill_siginfo(sig, &kinfo);
3620+
}
3621+
3622+
ret = kill_pid_info(sig, &kinfo, pid);
3623+
3624+
err:
3625+
fdput(f);
3626+
return ret;
3627+
}
3628+
#endif /* CONFIG_PROC_FS */
3629+
35093630
static int
35103631
do_send_specific(pid_t tgid, pid_t pid, int sig, struct kernel_siginfo *info)
35113632
{

kernel/sys_ni.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ COND_SYSCALL(syslog);
168168
/* kernel/sched/core.c */
169169

170170
/* kernel/signal.c */
171+
COND_SYSCALL(pidfd_send_signal);
171172

172173
/* kernel/sys.c */
173174
COND_SYSCALL(setregid);

tools/testing/selftests/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ TARGETS += net
3232
TARGETS += netfilter
3333
TARGETS += networking/timestamping
3434
TARGETS += nsfs
35+
TARGETS += pidfd
3536
TARGETS += powerpc
3637
TARGETS += proc
3738
TARGETS += pstore
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
CFLAGS += -g -I../../../../usr/include/
2+
3+
TEST_GEN_PROGS := pidfd_test
4+
5+
include ../lib.mk
6+

0 commit comments

Comments
 (0)