Skip to content

Commit a0fa1dd

Browse files
committed
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler changes from Ingo Molnar: - Add the initial implementation of SCHED_DEADLINE support: a real-time scheduling policy where tasks that meet their deadlines and periodically execute their instances in less than their runtime quota see real-time scheduling and won't miss any of their deadlines. Tasks that go over their quota get delayed (Available to privileged users for now) - Clean up and fix preempt_enable_no_resched() abuse all around the tree - Do sched_clock() performance optimizations on x86 and elsewhere - Fix and improve auto-NUMA balancing - Fix and clean up the idle loop - Apply various cleanups and fixes * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (60 commits) sched: Fix __sched_setscheduler() nice test sched: Move SCHED_RESET_ON_FORK into attr::sched_flags sched: Fix up attr::sched_priority warning sched: Fix up scheduler syscall LTP fails sched: Preserve the nice level over sched_setscheduler() and sched_setparam() calls sched/core: Fix htmldocs warnings sched/deadline: No need to check p if dl_se is valid sched/deadline: Remove unused variables sched/deadline: Fix sparse static warnings m68k: Fix build warning in mac_via.h sched, thermal: Clean up preempt_enable_no_resched() abuse sched, net: Fixup busy_loop_us_clock() sched, net: Clean up preempt_enable_no_resched() abuse sched/preempt: Fix up missed PREEMPT_NEED_RESCHED folding sched/preempt, locking: Rework local_bh_{dis,en}able() sched/clock, x86: Avoid a runtime condition in native_sched_clock() sched/clock: Fix up clear_sched_clock_stable() sched/clock, x86: Use a static_key for sched_clock_stable sched/clock: Remove local_irq_disable() from the clocks sched/clock, x86: Rewrite cyc2ns() to avoid the need to disable IRQs ...
2 parents 9326657 + eaad451 commit a0fa1dd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+3775
-626
lines changed

Documentation/sysctl/kernel.txt

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -428,11 +428,6 @@ rate for each task.
428428
numa_balancing_scan_size_mb is how many megabytes worth of pages are
429429
scanned for a given scan.
430430

431-
numa_balancing_settle_count is how many scan periods must complete before
432-
the schedule balancer stops pushing the task towards a preferred node. This
433-
gives the scheduler a chance to place the task on an alternative node if the
434-
preferred node is overloaded.
435-
436431
numa_balancing_migrate_deferred is how many page migrations get skipped
437432
unconditionally, after a page migration is skipped because a page is shared
438433
with other tasks. This reduces page migration overhead, and determines

arch/arm/include/asm/unistd.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
#include <uapi/asm/unistd.h>
1717

18-
#define __NR_syscalls (380)
18+
#define __NR_syscalls (384)
1919
#define __ARM_NR_cmpxchg (__ARM_NR_BASE+0x00fff0)
2020

2121
#define __ARCH_WANT_STAT64

arch/arm/include/uapi/asm/unistd.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,8 @@
406406
#define __NR_process_vm_writev (__NR_SYSCALL_BASE+377)
407407
#define __NR_kcmp (__NR_SYSCALL_BASE+378)
408408
#define __NR_finit_module (__NR_SYSCALL_BASE+379)
409+
#define __NR_sched_setattr (__NR_SYSCALL_BASE+380)
410+
#define __NR_sched_getattr (__NR_SYSCALL_BASE+381)
409411

410412
/*
411413
* This may need to be greater than __NR_last_syscall+1 in order to

arch/arm/kernel/calls.S

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,8 @@
389389
CALL(sys_process_vm_writev)
390390
CALL(sys_kcmp)
391391
CALL(sys_finit_module)
392+
/* 380 */ CALL(sys_sched_setattr)
393+
CALL(sys_sched_getattr)
392394
#ifndef syscalls_counted
393395
.equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
394396
#define syscalls_counted

arch/m68k/include/asm/mac_via.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,8 @@
254254
extern volatile __u8 *via1,*via2;
255255
extern int rbv_present,via_alt_mapping;
256256

257+
struct irq_desc;
258+
257259
extern void via_register_interrupts(void);
258260
extern void via_irq_enable(int);
259261
extern void via_irq_disable(int);

arch/x86/include/asm/mwait.h

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#ifndef _ASM_X86_MWAIT_H
22
#define _ASM_X86_MWAIT_H
33

4+
#include <linux/sched.h>
5+
46
#define MWAIT_SUBSTATE_MASK 0xf
57
#define MWAIT_CSTATE_MASK 0xf
68
#define MWAIT_SUBSTATE_SIZE 4
@@ -13,4 +15,45 @@
1315

1416
#define MWAIT_ECX_INTERRUPT_BREAK 0x1
1517

18+
static inline void __monitor(const void *eax, unsigned long ecx,
19+
unsigned long edx)
20+
{
21+
/* "monitor %eax, %ecx, %edx;" */
22+
asm volatile(".byte 0x0f, 0x01, 0xc8;"
23+
:: "a" (eax), "c" (ecx), "d"(edx));
24+
}
25+
26+
static inline void __mwait(unsigned long eax, unsigned long ecx)
27+
{
28+
/* "mwait %eax, %ecx;" */
29+
asm volatile(".byte 0x0f, 0x01, 0xc9;"
30+
:: "a" (eax), "c" (ecx));
31+
}
32+
33+
/*
34+
* This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
35+
* which can obviate IPI to trigger checking of need_resched.
36+
* We execute MONITOR against need_resched and enter optimized wait state
37+
* through MWAIT. Whenever someone changes need_resched, we would be woken
38+
* up from MWAIT (without an IPI).
39+
*
40+
* New with Core Duo processors, MWAIT can take some hints based on CPU
41+
* capability.
42+
*/
43+
static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
44+
{
45+
if (!current_set_polling_and_test()) {
46+
if (static_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) {
47+
mb();
48+
clflush((void *)&current_thread_info()->flags);
49+
mb();
50+
}
51+
52+
__monitor((void *)&current_thread_info()->flags, 0, 0);
53+
if (!need_resched())
54+
__mwait(eax, ecx);
55+
}
56+
current_clr_polling();
57+
}
58+
1659
#endif /* _ASM_X86_MWAIT_H */

arch/x86/include/asm/processor.h

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -700,29 +700,6 @@ static inline void sync_core(void)
700700
#endif
701701
}
702702

703-
static inline void __monitor(const void *eax, unsigned long ecx,
704-
unsigned long edx)
705-
{
706-
/* "monitor %eax, %ecx, %edx;" */
707-
asm volatile(".byte 0x0f, 0x01, 0xc8;"
708-
:: "a" (eax), "c" (ecx), "d"(edx));
709-
}
710-
711-
static inline void __mwait(unsigned long eax, unsigned long ecx)
712-
{
713-
/* "mwait %eax, %ecx;" */
714-
asm volatile(".byte 0x0f, 0x01, 0xc9;"
715-
:: "a" (eax), "c" (ecx));
716-
}
717-
718-
static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
719-
{
720-
trace_hardirqs_on();
721-
/* "mwait %eax, %ecx;" */
722-
asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
723-
:: "a" (eax), "c" (ecx));
724-
}
725-
726703
extern void select_idle_routine(const struct cpuinfo_x86 *c);
727704
extern void init_amd_e400_c1e_mask(void);
728705

arch/x86/include/asm/timer.h

Lines changed: 18 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <linux/pm.h>
55
#include <linux/percpu.h>
66
#include <linux/interrupt.h>
7+
#include <linux/math64.h>
78

89
#define TICK_SIZE (tick_nsec / 1000)
910

@@ -12,68 +13,26 @@ extern int recalibrate_cpu_khz(void);
1213

1314
extern int no_timer_check;
1415

15-
/* Accelerators for sched_clock()
16-
* convert from cycles(64bits) => nanoseconds (64bits)
17-
* basic equation:
18-
* ns = cycles / (freq / ns_per_sec)
19-
* ns = cycles * (ns_per_sec / freq)
20-
* ns = cycles * (10^9 / (cpu_khz * 10^3))
21-
* ns = cycles * (10^6 / cpu_khz)
16+
/*
17+
* We use the full linear equation: f(x) = a + b*x, in order to allow
18+
* a continuous function in the face of dynamic freq changes.
2219
*
23-
* Then we use scaling math (suggested by george@mvista.com) to get:
24-
* ns = cycles * (10^6 * SC / cpu_khz) / SC
25-
* ns = cycles * cyc2ns_scale / SC
20+
* Continuity means that when our frequency changes our slope (b); we want to
21+
* ensure that: f(t) == f'(t), which gives: a + b*t == a' + b'*t.
2622
*
27-
* And since SC is a constant power of two, we can convert the div
28-
* into a shift.
23+
* Without an offset (a) the above would not be possible.
2924
*
30-
* We can use khz divisor instead of mhz to keep a better precision, since
31-
* cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
32-
* (mathieu.desnoyers@polymtl.ca)
33-
*
34-
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
35-
*
36-
* In:
37-
*
38-
* ns = cycles * cyc2ns_scale / SC
39-
*
40-
* Although we may still have enough bits to store the value of ns,
41-
* in some cases, we may not have enough bits to store cycles * cyc2ns_scale,
42-
* leading to an incorrect result.
43-
*
44-
* To avoid this, we can decompose 'cycles' into quotient and remainder
45-
* of division by SC. Then,
46-
*
47-
* ns = (quot * SC + rem) * cyc2ns_scale / SC
48-
* = quot * cyc2ns_scale + (rem * cyc2ns_scale) / SC
49-
*
50-
* - sqazi@google.com
25+
* See the comment near cycles_2_ns() for details on how we compute (b).
5126
*/
52-
53-
DECLARE_PER_CPU(unsigned long, cyc2ns);
54-
DECLARE_PER_CPU(unsigned long long, cyc2ns_offset);
55-
56-
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
57-
58-
static inline unsigned long long __cycles_2_ns(unsigned long long cyc)
59-
{
60-
int cpu = smp_processor_id();
61-
unsigned long long ns = per_cpu(cyc2ns_offset, cpu);
62-
ns += mult_frac(cyc, per_cpu(cyc2ns, cpu),
63-
(1UL << CYC2NS_SCALE_FACTOR));
64-
return ns;
65-
}
66-
67-
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
68-
{
69-
unsigned long long ns;
70-
unsigned long flags;
71-
72-
local_irq_save(flags);
73-
ns = __cycles_2_ns(cyc);
74-
local_irq_restore(flags);
75-
76-
return ns;
77-
}
27+
struct cyc2ns_data {
28+
u32 cyc2ns_mul;
29+
u32 cyc2ns_shift;
30+
u64 cyc2ns_offset;
31+
u32 __count;
32+
/* u32 hole */
33+
}; /* 24 bytes -- do not grow */
34+
35+
extern struct cyc2ns_data *cyc2ns_read_begin(void);
36+
extern void cyc2ns_read_end(struct cyc2ns_data *);
7837

7938
#endif /* _ASM_X86_TIMER_H */

arch/x86/kernel/acpi/cstate.c

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -150,29 +150,6 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
150150
}
151151
EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe);
152152

153-
/*
154-
* This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
155-
* which can obviate IPI to trigger checking of need_resched.
156-
* We execute MONITOR against need_resched and enter optimized wait state
157-
* through MWAIT. Whenever someone changes need_resched, we would be woken
158-
* up from MWAIT (without an IPI).
159-
*
160-
* New with Core Duo processors, MWAIT can take some hints based on CPU
161-
* capability.
162-
*/
163-
void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
164-
{
165-
if (!need_resched()) {
166-
if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
167-
clflush((void *)&current_thread_info()->flags);
168-
169-
__monitor((void *)&current_thread_info()->flags, 0, 0);
170-
smp_mb();
171-
if (!need_resched())
172-
__mwait(ax, cx);
173-
}
174-
}
175-
176153
void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx)
177154
{
178155
unsigned int cpu = smp_processor_id();

arch/x86/kernel/cpu/amd.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,7 @@ static void early_init_amd(struct cpuinfo_x86 *c)
487487
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
488488
set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
489489
if (!check_tsc_unstable())
490-
sched_clock_stable = 1;
490+
set_sched_clock_stable();
491491
}
492492

493493
#ifdef CONFIG_X86_64

arch/x86/kernel/cpu/intel.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
9393
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
9494
set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
9595
if (!check_tsc_unstable())
96-
sched_clock_stable = 1;
96+
set_sched_clock_stable();
9797
}
9898

9999
/* Penwell and Cloverview have the TSC which doesn't sleep on S3 */

arch/x86/kernel/cpu/perf_event.c

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1883,21 +1883,27 @@ static struct pmu pmu = {
18831883

18841884
void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
18851885
{
1886+
struct cyc2ns_data *data;
1887+
18861888
userpg->cap_user_time = 0;
18871889
userpg->cap_user_time_zero = 0;
18881890
userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc;
18891891
userpg->pmc_width = x86_pmu.cntval_bits;
18901892

1891-
if (!sched_clock_stable)
1893+
if (!sched_clock_stable())
18921894
return;
18931895

1896+
data = cyc2ns_read_begin();
1897+
18941898
userpg->cap_user_time = 1;
1895-
userpg->time_mult = this_cpu_read(cyc2ns);
1896-
userpg->time_shift = CYC2NS_SCALE_FACTOR;
1897-
userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
1899+
userpg->time_mult = data->cyc2ns_mul;
1900+
userpg->time_shift = data->cyc2ns_shift;
1901+
userpg->time_offset = data->cyc2ns_offset - now;
18981902

18991903
userpg->cap_user_time_zero = 1;
1900-
userpg->time_zero = this_cpu_read(cyc2ns_offset);
1904+
userpg->time_zero = data->cyc2ns_offset;
1905+
1906+
cyc2ns_read_end(data);
19011907
}
19021908

19031909
/*

arch/x86/kernel/smpboot.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1417,7 +1417,9 @@ static inline void mwait_play_dead(void)
14171417
* The WBINVD is insufficient due to the spurious-wakeup
14181418
* case where we return around the loop.
14191419
*/
1420+
mb();
14201421
clflush(mwait_ptr);
1422+
mb();
14211423
__monitor(mwait_ptr, 0, 0);
14221424
mb();
14231425
__mwait(eax, 0);

0 commit comments

Comments
 (0)