Skip to content

Commit dcbf77b

Browse files
committed
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (37 commits) sched: Fix SD_POWERSAVING_BALANCE|SD_PREFER_LOCAL vs SD_WAKE_AFFINE sched: Stop buddies from hogging the system sched: Add new wakeup preemption mode: WAKEUP_RUNNING sched: Fix TASK_WAKING & loadaverage breakage sched: Disable wakeup balancing sched: Rename flags to wake_flags sched: Clean up the load_idx selection in select_task_rq_fair sched: Optimize cgroup vs wakeup a bit sched: x86: Name old_perf in a unique way sched: Implement a gentler fair-sleepers feature sched: Add SD_PREFER_LOCAL sched: Add a few SYNC hint knobs to play with sched: Fix sync wakeups again sched: Add WF_FORK sched: Rename sync arguments sched: Rename select_task_rq() argument sched: Feature to disable APERF/MPERF cpu_power x86: sched: Provide arch implementations using aperf/mperf x86: Add generic aperf/mperf code x86: Move APERF/MPERF into a X86_FEATURE ... Fix up trivial conflict in arch/x86/include/asm/processor.h due to nearby addition of amd_get_nb_id() declaration from the EDAC merge.
2 parents ca043a6 + 29cd8ba commit dcbf77b

File tree

21 files changed

+688
-603
lines changed

21 files changed

+688
-603
lines changed

arch/ia64/include/asm/topology.h

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,13 @@ void build_cpu_to_node_map(void);
6161
.cache_nice_tries = 2, \
6262
.busy_idx = 2, \
6363
.idle_idx = 1, \
64-
.newidle_idx = 2, \
65-
.wake_idx = 1, \
66-
.forkexec_idx = 1, \
64+
.newidle_idx = 0, \
65+
.wake_idx = 0, \
66+
.forkexec_idx = 0, \
6767
.flags = SD_LOAD_BALANCE \
6868
| SD_BALANCE_NEWIDLE \
6969
| SD_BALANCE_EXEC \
70+
| SD_BALANCE_FORK \
7071
| SD_WAKE_AFFINE, \
7172
.last_balance = jiffies, \
7273
.balance_interval = 1, \
@@ -85,14 +86,14 @@ void build_cpu_to_node_map(void);
8586
.cache_nice_tries = 2, \
8687
.busy_idx = 3, \
8788
.idle_idx = 2, \
88-
.newidle_idx = 2, \
89-
.wake_idx = 1, \
90-
.forkexec_idx = 1, \
89+
.newidle_idx = 0, \
90+
.wake_idx = 0, \
91+
.forkexec_idx = 0, \
9192
.flags = SD_LOAD_BALANCE \
93+
| SD_BALANCE_NEWIDLE \
9294
| SD_BALANCE_EXEC \
9395
| SD_BALANCE_FORK \
94-
| SD_SERIALIZE \
95-
| SD_WAKE_BALANCE, \
96+
| SD_SERIALIZE, \
9697
.last_balance = jiffies, \
9798
.balance_interval = 64, \
9899
.nr_balance_failed = 0, \

arch/mips/include/asm/mach-ip27/topology.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
4848
.cache_nice_tries = 1, \
4949
.flags = SD_LOAD_BALANCE \
5050
| SD_BALANCE_EXEC \
51-
| SD_WAKE_BALANCE, \
5251
.last_balance = jiffies, \
5352
.balance_interval = 1, \
5453
.nr_balance_failed = 0, \

arch/powerpc/include/asm/topology.h

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,13 @@ static inline int pcibus_to_node(struct pci_bus *bus)
5757
.cache_nice_tries = 1, \
5858
.busy_idx = 3, \
5959
.idle_idx = 1, \
60-
.newidle_idx = 2, \
61-
.wake_idx = 1, \
60+
.newidle_idx = 0, \
61+
.wake_idx = 0, \
6262
.flags = SD_LOAD_BALANCE \
6363
| SD_BALANCE_EXEC \
64+
| SD_BALANCE_FORK \
6465
| SD_BALANCE_NEWIDLE \
65-
| SD_WAKE_IDLE \
66-
| SD_SERIALIZE \
67-
| SD_WAKE_BALANCE, \
66+
| SD_SERIALIZE, \
6867
.last_balance = jiffies, \
6968
.balance_interval = 1, \
7069
.nr_balance_failed = 0, \

arch/sh/include/asm/topology.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,14 @@
1515
.cache_nice_tries = 2, \
1616
.busy_idx = 3, \
1717
.idle_idx = 2, \
18-
.newidle_idx = 2, \
19-
.wake_idx = 1, \
20-
.forkexec_idx = 1, \
18+
.newidle_idx = 0, \
19+
.wake_idx = 0, \
20+
.forkexec_idx = 0, \
2121
.flags = SD_LOAD_BALANCE \
2222
| SD_BALANCE_FORK \
2323
| SD_BALANCE_EXEC \
24-
| SD_SERIALIZE \
25-
| SD_WAKE_BALANCE, \
24+
| SD_BALANCE_NEWIDLE \
25+
| SD_SERIALIZE, \
2626
.last_balance = jiffies, \
2727
.balance_interval = 1, \
2828
.nr_balance_failed = 0, \

arch/sparc/include/asm/topology_64.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,12 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
5252
.busy_idx = 3, \
5353
.idle_idx = 2, \
5454
.newidle_idx = 0, \
55-
.wake_idx = 1, \
56-
.forkexec_idx = 1, \
55+
.wake_idx = 0, \
56+
.forkexec_idx = 0, \
5757
.flags = SD_LOAD_BALANCE \
5858
| SD_BALANCE_FORK \
5959
| SD_BALANCE_EXEC \
60-
| SD_SERIALIZE \
61-
| SD_WAKE_BALANCE, \
60+
| SD_SERIALIZE, \
6261
.last_balance = jiffies, \
6362
.balance_interval = 1, \
6463
}

arch/x86/include/asm/cpufeature.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@
9696
#define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */
9797
#define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */
9898
#define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */
99+
#define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */
99100

100101
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
101102
#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */

arch/x86/include/asm/processor.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ struct mm_struct;
2727
#include <linux/cpumask.h>
2828
#include <linux/cache.h>
2929
#include <linux/threads.h>
30+
#include <linux/math64.h>
3031
#include <linux/init.h>
3132

3233
/*
@@ -1022,4 +1023,33 @@ extern int set_tsc_mode(unsigned int val);
10221023

10231024
extern int amd_get_nb_id(int cpu);
10241025

1026+
struct aperfmperf {
1027+
u64 aperf, mperf;
1028+
};
1029+
1030+
static inline void get_aperfmperf(struct aperfmperf *am)
1031+
{
1032+
WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_APERFMPERF));
1033+
1034+
rdmsrl(MSR_IA32_APERF, am->aperf);
1035+
rdmsrl(MSR_IA32_MPERF, am->mperf);
1036+
}
1037+
1038+
#define APERFMPERF_SHIFT 10
1039+
1040+
static inline
1041+
unsigned long calc_aperfmperf_ratio(struct aperfmperf *old,
1042+
struct aperfmperf *new)
1043+
{
1044+
u64 aperf = new->aperf - old->aperf;
1045+
u64 mperf = new->mperf - old->mperf;
1046+
unsigned long ratio = aperf;
1047+
1048+
mperf >>= APERFMPERF_SHIFT;
1049+
if (mperf)
1050+
ratio = div64_u64(aperf, mperf);
1051+
1052+
return ratio;
1053+
}
1054+
10251055
#endif /* _ASM_X86_PROCESSOR_H */

arch/x86/include/asm/topology.h

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -116,15 +116,11 @@ extern unsigned long node_remap_size[];
116116

117117
# define SD_CACHE_NICE_TRIES 1
118118
# define SD_IDLE_IDX 1
119-
# define SD_NEWIDLE_IDX 2
120-
# define SD_FORKEXEC_IDX 0
121119

122120
#else
123121

124122
# define SD_CACHE_NICE_TRIES 2
125123
# define SD_IDLE_IDX 2
126-
# define SD_NEWIDLE_IDX 2
127-
# define SD_FORKEXEC_IDX 1
128124

129125
#endif
130126

@@ -137,22 +133,20 @@ extern unsigned long node_remap_size[];
137133
.cache_nice_tries = SD_CACHE_NICE_TRIES, \
138134
.busy_idx = 3, \
139135
.idle_idx = SD_IDLE_IDX, \
140-
.newidle_idx = SD_NEWIDLE_IDX, \
141-
.wake_idx = 1, \
142-
.forkexec_idx = SD_FORKEXEC_IDX, \
136+
.newidle_idx = 0, \
137+
.wake_idx = 0, \
138+
.forkexec_idx = 0, \
143139
\
144140
.flags = 1*SD_LOAD_BALANCE \
145141
| 1*SD_BALANCE_NEWIDLE \
146142
| 1*SD_BALANCE_EXEC \
147143
| 1*SD_BALANCE_FORK \
148-
| 0*SD_WAKE_IDLE \
144+
| 0*SD_BALANCE_WAKE \
149145
| 1*SD_WAKE_AFFINE \
150-
| 1*SD_WAKE_BALANCE \
151146
| 0*SD_SHARE_CPUPOWER \
152147
| 0*SD_POWERSAVINGS_BALANCE \
153148
| 0*SD_SHARE_PKG_RESOURCES \
154149
| 1*SD_SERIALIZE \
155-
| 1*SD_WAKE_IDLE_FAR \
156150
| 0*SD_PREFER_SIBLING \
157151
, \
158152
.last_balance = jiffies, \

arch/x86/kernel/cpu/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ CFLAGS_common.o := $(nostackp)
1313

1414
obj-y := intel_cacheinfo.o addon_cpuid_features.o
1515
obj-y += proc.o capflags.o powerflags.o common.o
16-
obj-y += vmware.o hypervisor.o
16+
obj-y += vmware.o hypervisor.o sched.o
1717

1818
obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
1919
obj-$(CONFIG_X86_64) += bugs_64.o

arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c

Lines changed: 11 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ enum {
6060
};
6161

6262
#define INTEL_MSR_RANGE (0xffff)
63-
#define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1)
6463

6564
struct acpi_cpufreq_data {
6665
struct acpi_processor_performance *acpi_data;
@@ -71,11 +70,7 @@ struct acpi_cpufreq_data {
7170

7271
static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
7372

74-
struct acpi_msr_data {
75-
u64 saved_aperf, saved_mperf;
76-
};
77-
78-
static DEFINE_PER_CPU(struct acpi_msr_data, msr_data);
73+
static DEFINE_PER_CPU(struct aperfmperf, old_perf);
7974

8075
DEFINE_TRACE(power_mark);
8176

@@ -244,23 +239,12 @@ static u32 get_cur_val(const struct cpumask *mask)
244239
return cmd.val;
245240
}
246241

247-
struct perf_pair {
248-
union {
249-
struct {
250-
u32 lo;
251-
u32 hi;
252-
} split;
253-
u64 whole;
254-
} aperf, mperf;
255-
};
256-
257242
/* Called via smp_call_function_single(), on the target CPU */
258243
static void read_measured_perf_ctrs(void *_cur)
259244
{
260-
struct perf_pair *cur = _cur;
245+
struct aperfmperf *am = _cur;
261246

262-
rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi);
263-
rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi);
247+
get_aperfmperf(am);
264248
}
265249

266250
/*
@@ -279,63 +263,17 @@ static void read_measured_perf_ctrs(void *_cur)
279263
static unsigned int get_measured_perf(struct cpufreq_policy *policy,
280264
unsigned int cpu)
281265
{
282-
struct perf_pair readin, cur;
283-
unsigned int perf_percent;
266+
struct aperfmperf perf;
267+
unsigned long ratio;
284268
unsigned int retval;
285269

286-
if (smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1))
270+
if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1))
287271
return 0;
288272

289-
cur.aperf.whole = readin.aperf.whole -
290-
per_cpu(msr_data, cpu).saved_aperf;
291-
cur.mperf.whole = readin.mperf.whole -
292-
per_cpu(msr_data, cpu).saved_mperf;
293-
per_cpu(msr_data, cpu).saved_aperf = readin.aperf.whole;
294-
per_cpu(msr_data, cpu).saved_mperf = readin.mperf.whole;
295-
296-
#ifdef __i386__
297-
/*
298-
* We dont want to do 64 bit divide with 32 bit kernel
299-
* Get an approximate value. Return failure in case we cannot get
300-
* an approximate value.
301-
*/
302-
if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) {
303-
int shift_count;
304-
u32 h;
305-
306-
h = max_t(u32, cur.aperf.split.hi, cur.mperf.split.hi);
307-
shift_count = fls(h);
308-
309-
cur.aperf.whole >>= shift_count;
310-
cur.mperf.whole >>= shift_count;
311-
}
312-
313-
if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) {
314-
int shift_count = 7;
315-
cur.aperf.split.lo >>= shift_count;
316-
cur.mperf.split.lo >>= shift_count;
317-
}
318-
319-
if (cur.aperf.split.lo && cur.mperf.split.lo)
320-
perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo;
321-
else
322-
perf_percent = 0;
273+
ratio = calc_aperfmperf_ratio(&per_cpu(old_perf, cpu), &perf);
274+
per_cpu(old_perf, cpu) = perf;
323275

324-
#else
325-
if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) {
326-
int shift_count = 7;
327-
cur.aperf.whole >>= shift_count;
328-
cur.mperf.whole >>= shift_count;
329-
}
330-
331-
if (cur.aperf.whole && cur.mperf.whole)
332-
perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole;
333-
else
334-
perf_percent = 0;
335-
336-
#endif
337-
338-
retval = (policy->cpuinfo.max_freq * perf_percent) / 100;
276+
retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT;
339277

340278
return retval;
341279
}
@@ -731,12 +669,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
731669
acpi_processor_notify_smm(THIS_MODULE);
732670

733671
/* Check for APERF/MPERF support in hardware */
734-
if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) {
735-
unsigned int ecx;
736-
ecx = cpuid_ecx(6);
737-
if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY)
738-
acpi_cpufreq_driver.getavg = get_measured_perf;
739-
}
672+
if (cpu_has(c, X86_FEATURE_APERFMPERF))
673+
acpi_cpufreq_driver.getavg = get_measured_perf;
740674

741675
dprintk("CPU%u - ACPI performance management activated.\n", cpu);
742676
for (i = 0; i < perf->state_count; i++)

arch/x86/kernel/cpu/intel.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
350350
set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
351351
}
352352

353+
if (c->cpuid_level > 6) {
354+
unsigned ecx = cpuid_ecx(6);
355+
if (ecx & 0x01)
356+
set_cpu_cap(c, X86_FEATURE_APERFMPERF);
357+
}
358+
353359
if (cpu_has_xmm2)
354360
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
355361
if (cpu_has_ds) {

0 commit comments

Comments
 (0)