Skip to content

Commit 6dfc889

Browse files
Ingo MolnarLinus Torvalds
authored andcommitted
[PATCH] shared thread signals
Support POSIX compliant thread signals on a kernel level with usable debugging (broadcast SIGSTOP, SIGCONT) and thread group management (broadcast SIGKILL), plus to load-balance 'process' signals between threads for better signal performance. Changes: - POSIX thread semantics for signals there are 7 'types' of actions a signal can take: specific, load-balance, kill-all, kill-all+core, stop-all, continue-all and ignore. Depending on the POSIX specifications each signal has one of the types defined for both the 'handler defined' and the 'handler not defined (kernel default)' case. Here is the table: ---------------------------------------------------------- | | userspace | kernel | ---------------------------------------------------------- | SIGHUP | load-balance | kill-all | | SIGINT | load-balance | kill-all | | SIGQUIT | load-balance | kill-all+core | | SIGILL | specific | kill-all+core | | SIGTRAP | specific | kill-all+core | | SIGABRT/SIGIOT | specific | kill-all+core | | SIGBUS | specific | kill-all+core | | SIGFPE | specific | kill-all+core | | SIGKILL | n/a | kill-all | | SIGUSR1 | load-balance | kill-all | | SIGSEGV | specific | kill-all+core | | SIGUSR2 | load-balance | kill-all | | SIGPIPE | specific | kill-all | | SIGALRM | load-balance | kill-all | | SIGTERM | load-balance | kill-all | | SIGCHLD | load-balance | ignore | | SIGCONT | load-balance | continue-all | | SIGSTOP | n/a | stop-all | | SIGTSTP | load-balance | stop-all | | SIGTTIN | load-balancen | stop-all | | SIGTTOU | load-balancen | stop-all | | SIGURG | load-balance | ignore | | SIGXCPU | specific | kill-all+core | | SIGXFSZ | specific | kill-all+core | | SIGVTALRM | load-balance | kill-all | | SIGPROF | specific | kill-all | | SIGPOLL/SIGIO | load-balance | kill-all | | SIGSYS/SIGUNUSED | specific | kill-all+core | | SIGSTKFLT | specific | kill-all | | SIGWINCH | load-balance | ignore | | SIGPWR | load-balance | kill-all | | SIGRTMIN-SIGRTMAX | load-balance | kill-all | ---------------------------------------------------------- as you can see it from the list, signals that have handlers defined never get broadcasted - they are either specific or load-balanced. - CLONE_THREAD implies CLONE_SIGHAND It does not make much sense to have a thread group that does not share signal handlers. In fact in the patch i'm using the signal spinlock to lock access to the thread group. I made the siglock IRQ-safe, thus we can load-balance signals from interrupt contexts as well. (we cannot take the tasklist lock in write mode from IRQ handlers.) this is not as clean as i'd like it to be, but it's the best i could come up with so far. - thread group list management reworked. threads are now removed from the group if the thread is unhashed from the PID table. This makes the most sense. This also helps with another feature that relies on an intact thread group list: multithreaded coredumps. - child reparenting reworked. the O(N) algorithm in forget_original_parent() causes massive performance problems if a large number of threads exit from the group. Performance improves more than 10-fold if the following simple rules are followed instead: - reparent children to the *previous* thread [exiting or not] - if a thread is detached then reparent to init. - fast broadcasting of kernel-internal SIGSTOP, SIGCONT, SIGKILL, etc. kernel-internal broadcasted signals are a potential DoS problem, since they might generate massive amounts of GFP_ATOMIC allocations of siginfo structures. The important thing to note is that the siginfo structure does not actually have to be allocated and queued - the signal processing code has all the information it needs, neither of these signals carries any information in the siginfo structure. This makes a broadcast SIGKILL a very simple operation: all threads get the bit 9 set in their pending bitmask. The speedup due to this was significant - and the robustness win is invaluable. - sys_execve() should not kill off 'all other' threads. the 'exec kills all threads if the master thread does the exec()' is a POSIX(-ish) thing that should not be hardcoded in the kernel in this case. to handle POSIX exec() semantics, glibc uses a special syscall, which kills 'all but self' threads: sys_exit_allbutself(). the straightforward exec() implementation just calls sys_exit_allbutself() and then sys_execve(). (this syscall is also be used internally if the thread group leader thread sys_exit()s or sys_exec()s, to ensure the integrity of the thread group.)
1 parent 3678024 commit 6dfc889

File tree

7 files changed

+491
-193
lines changed

7 files changed

+491
-193
lines changed

fs/exec.c

Lines changed: 8 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,8 @@ static inline int make_private_signals(void)
504504
{
505505
struct signal_struct * newsig;
506506

507+
remove_thread_group(current, current->sig);
508+
507509
if (atomic_read(&current->sig->count) <= 1)
508510
return 0;
509511
newsig = kmem_cache_alloc(sigact_cachep, GFP_KERNEL);
@@ -575,42 +577,10 @@ static inline void flush_old_files(struct files_struct * files)
575577
*/
576578
static void de_thread(struct task_struct *tsk)
577579
{
578-
struct task_struct *sub;
579-
struct list_head *head, *ptr;
580-
struct siginfo info;
581-
int pause;
582-
583-
write_lock_irq(&tasklist_lock);
584-
585-
if (tsk->tgid != tsk->pid) {
586-
/* subsidiary thread - just escapes the group */
587-
list_del_init(&tsk->thread_group);
588-
tsk->tgid = tsk->pid;
589-
pause = 0;
590-
}
591-
else {
592-
/* master thread - kill all subsidiary threads */
593-
info.si_signo = SIGKILL;
594-
info.si_errno = 0;
595-
info.si_code = SI_DETHREAD;
596-
info.si_pid = current->pid;
597-
info.si_uid = current->uid;
598-
599-
head = tsk->thread_group.next;
600-
list_del_init(&tsk->thread_group);
601-
602-
list_for_each(ptr,head) {
603-
sub = list_entry(ptr,struct task_struct,thread_group);
604-
send_sig_info(SIGKILL,&info,sub);
605-
}
606-
607-
pause = 1;
608-
}
609-
610-
write_unlock_irq(&tasklist_lock);
611-
612-
/* give the subsidiary threads a chance to clean themselves up */
613-
if (pause) yield();
580+
if (!list_empty(&tsk->thread_group))
581+
BUG();
582+
/* An exec() starts a new thread group: */
583+
tsk->tgid = tsk->pid;
614584
}
615585

616586
int flush_old_exec(struct linux_binprm * bprm)
@@ -633,6 +603,8 @@ int flush_old_exec(struct linux_binprm * bprm)
633603
if (retval) goto mmap_failed;
634604

635605
/* This is the point of no return */
606+
de_thread(current);
607+
636608
release_old_signals(oldsig);
637609

638610
current->sas_ss_sp = current->sas_ss_size = 0;
@@ -651,9 +623,6 @@ int flush_old_exec(struct linux_binprm * bprm)
651623

652624
flush_thread();
653625

654-
if (!list_empty(&current->thread_group))
655-
de_thread(current);
656-
657626
if (bprm->e_uid != current->euid || bprm->e_gid != current->egid ||
658627
permission(bprm->file->f_dentry->d_inode,MAY_READ))
659628
current->mm->dumpable = 0;

include/asm-i386/spinlock.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,8 @@ typedef struct {
158158

159159
#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0)
160160

161+
#define rwlock_is_locked(x) ((x)->lock != RW_LOCK_BIAS)
162+
161163
/*
162164
* On x86, we implement read-write locks as a 32-bit counter
163165
* with the high bit (sign) being the "contended" bit.

include/linux/sched.h

Lines changed: 34 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,11 @@ struct signal_struct {
211211
atomic_t count;
212212
struct k_sigaction action[_NSIG];
213213
spinlock_t siglock;
214+
215+
/* current thread group signal load-balancing target: */
216+
task_t *curr_target;
217+
218+
struct sigpending shared_pending;
214219
};
215220

216221
/*
@@ -356,7 +361,7 @@ struct task_struct {
356361
spinlock_t sigmask_lock; /* Protects signal and blocked */
357362
struct signal_struct *sig;
358363

359-
sigset_t blocked;
364+
sigset_t blocked, real_blocked, shared_unblocked;
360365
struct sigpending pending;
361366

362367
unsigned long sas_ss_sp;
@@ -431,6 +436,7 @@ extern void set_cpus_allowed(task_t *p, unsigned long new_mask);
431436
extern void set_user_nice(task_t *p, long nice);
432437
extern int task_prio(task_t *p);
433438
extern int task_nice(task_t *p);
439+
extern int task_curr(task_t *p);
434440
extern int idle_cpu(int cpu);
435441

436442
void yield(void);
@@ -535,7 +541,7 @@ extern void proc_caches_init(void);
535541
extern void flush_signals(struct task_struct *);
536542
extern void flush_signal_handlers(struct task_struct *);
537543
extern void sig_exit(int, int, struct siginfo *);
538-
extern int dequeue_signal(sigset_t *, siginfo_t *);
544+
extern int dequeue_signal(struct sigpending *pending, sigset_t *mask, siginfo_t *info);
539545
extern void block_all_signals(int (*notifier)(void *priv), void *priv,
540546
sigset_t *mask);
541547
extern void unblock_all_signals(void);
@@ -654,6 +660,7 @@ extern void exit_thread(void);
654660
extern void exit_mm(struct task_struct *);
655661
extern void exit_files(struct task_struct *);
656662
extern void exit_sighand(struct task_struct *);
663+
extern void remove_thread_group(struct task_struct *tsk, struct signal_struct *sig);
657664

658665
extern void reparent_to_init(void);
659666
extern void daemonize(void);
@@ -786,8 +793,29 @@ static inline struct task_struct *younger_sibling(struct task_struct *p)
786793
#define for_each_thread(task) \
787794
for (task = next_thread(current) ; task != current ; task = next_thread(task))
788795

789-
#define next_thread(p) \
790-
list_entry((p)->thread_group.next, struct task_struct, thread_group)
796+
static inline task_t *next_thread(task_t *p)
797+
{
798+
if (!p->sig)
799+
BUG();
800+
#if CONFIG_SMP
801+
if (!spin_is_locked(&p->sig->siglock) &&
802+
!rwlock_is_locked(&tasklist_lock))
803+
BUG();
804+
#endif
805+
return list_entry((p)->thread_group.next, task_t, thread_group);
806+
}
807+
808+
static inline task_t *prev_thread(task_t *p)
809+
{
810+
if (!p->sig)
811+
BUG();
812+
#if CONFIG_SMP
813+
if (!spin_is_locked(&p->sig->siglock) &&
814+
!rwlock_is_locked(&tasklist_lock))
815+
BUG();
816+
#endif
817+
return list_entry((p)->thread_group.prev, task_t, thread_group);
818+
}
791819

792820
#define thread_group_leader(p) (p->pid == p->tgid)
793821

@@ -903,21 +931,8 @@ static inline void cond_resched(void)
903931
This is required every time the blocked sigset_t changes.
904932
Athread cathreaders should have t->sigmask_lock. */
905933

906-
static inline void recalc_sigpending_tsk(struct task_struct *t)
907-
{
908-
if (has_pending_signals(&t->pending.signal, &t->blocked))
909-
set_tsk_thread_flag(t, TIF_SIGPENDING);
910-
else
911-
clear_tsk_thread_flag(t, TIF_SIGPENDING);
912-
}
913-
914-
static inline void recalc_sigpending(void)
915-
{
916-
if (has_pending_signals(&current->pending.signal, &current->blocked))
917-
set_thread_flag(TIF_SIGPENDING);
918-
else
919-
clear_thread_flag(TIF_SIGPENDING);
920-
}
934+
extern FASTCALL(void recalc_sigpending_tsk(struct task_struct *t));
935+
extern void recalc_sigpending(void);
921936

922937
/*
923938
* Wrappers for p->thread_info->cpu access. No-op on UP.

kernel/exit.c

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ static inline void __unhash_process(struct task_struct *p)
3636
nr_threads--;
3737
unhash_pid(p);
3838
REMOVE_LINKS(p);
39-
list_del(&p->thread_group);
4039
p->pid = 0;
4140
proc_dentry = p->proc_dentry;
4241
if (unlikely(proc_dentry != NULL)) {
@@ -73,6 +72,7 @@ static void release_task(struct task_struct * p)
7372
}
7473
BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
7574
unhash_process(p);
75+
exit_sighand(p);
7676

7777
release_thread(p);
7878
if (p != current) {
@@ -244,7 +244,8 @@ void daemonize(void)
244244
static void reparent_thread(task_t *p, task_t *reaper, task_t *child_reaper)
245245
{
246246
/* We dont want people slaying init */
247-
p->exit_signal = SIGCHLD;
247+
if (p->exit_signal != -1)
248+
p->exit_signal = SIGCHLD;
248249
p->self_exec_id++;
249250

250251
/* Make sure we're not reparenting to ourselves */
@@ -412,18 +413,15 @@ void exit_mm(struct task_struct *tsk)
412413
*/
413414
static inline void forget_original_parent(struct task_struct * father)
414415
{
415-
struct task_struct *p, *reaper;
416+
struct task_struct *p, *reaper = father;
416417
struct list_head *_p;
417418

418-
read_lock(&tasklist_lock);
419+
write_lock_irq(&tasklist_lock);
419420

420-
/* Next in our thread group, if they're not already exiting */
421-
reaper = father;
422-
do {
423-
reaper = next_thread(reaper);
424-
if (!(reaper->flags & PF_EXITING))
425-
break;
426-
} while (reaper != father);
421+
if (father->exit_signal != -1)
422+
reaper = prev_thread(reaper);
423+
else
424+
reaper = child_reaper;
427425

428426
if (reaper == father)
429427
reaper = child_reaper;
@@ -444,7 +442,7 @@ static inline void forget_original_parent(struct task_struct * father)
444442
p = list_entry(_p,struct task_struct,ptrace_list);
445443
reparent_thread(p, reaper, child_reaper);
446444
}
447-
read_unlock(&tasklist_lock);
445+
write_unlock_irq(&tasklist_lock);
448446
}
449447

450448
static inline void zap_thread(task_t *p, task_t *father, int traced)
@@ -604,7 +602,6 @@ NORET_TYPE void do_exit(long code)
604602
__exit_files(tsk);
605603
__exit_fs(tsk);
606604
exit_namespace(tsk);
607-
exit_sighand(tsk);
608605
exit_thread();
609606

610607
if (current->leader)
@@ -763,6 +760,8 @@ asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struc
763760
if (options & __WNOTHREAD)
764761
break;
765762
tsk = next_thread(tsk);
763+
if (tsk->sig != current->sig)
764+
BUG();
766765
} while (tsk != current);
767766
read_unlock(&tasklist_lock);
768767
if (flag) {

kernel/fork.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,9 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t
630630
spin_lock_init(&sig->siglock);
631631
atomic_set(&sig->count, 1);
632632
memcpy(tsk->sig->action, current->sig->action, sizeof(tsk->sig->action));
633+
sig->curr_target = NULL;
634+
init_sigpending(&sig->shared_pending);
635+
633636
return 0;
634637
}
635638

@@ -664,6 +667,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
664667
if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
665668
return ERR_PTR(-EINVAL);
666669

670+
/*
671+
* Thread groups must share signals as well:
672+
*/
673+
if (clone_flags & CLONE_THREAD)
674+
clone_flags |= CLONE_SIGHAND;
675+
667676
retval = security_ops->task_create(clone_flags);
668677
if (retval)
669678
goto fork_out;
@@ -843,8 +852,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
843852
p->parent = p->real_parent;
844853

845854
if (clone_flags & CLONE_THREAD) {
855+
spin_lock(&current->sig->siglock);
846856
p->tgid = current->tgid;
847857
list_add(&p->thread_group, &current->thread_group);
858+
spin_unlock(&current->sig->siglock);
848859
}
849860

850861
SET_LINKS(p);

kernel/sched.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1335,6 +1335,15 @@ int task_nice(task_t *p)
13351335
return TASK_NICE(p);
13361336
}
13371337

1338+
/**
1339+
* task_curr - is this task currently executing on a CPU?
1340+
* @p: the task in question.
1341+
*/
1342+
int task_curr(task_t *p)
1343+
{
1344+
return cpu_curr(task_cpu(p)) == p;
1345+
}
1346+
13381347
/**
13391348
* idle_cpu - is a given cpu idle currently?
13401349
* @cpu: the processor in question.

0 commit comments

Comments
 (0)