Skip to content

Commit 5622f29

Browse files
markus-metzgerIngo Molnar
authored andcommitted
x86, perf_counter, bts: Optimize BTS overflow handling
Draining the BTS buffer on a buffer overflow interrupt takes too long resulting in a kernel lockup when tracing the kernel. Restructure perf_counter sampling into sample creation and sample output. Prepare a single reference sample for BTS sampling and update the from and to address fields when draining the BTS buffer. Drain the entire BTS buffer between a single perf_output_begin() / perf_output_end() pair. Signed-off-by: Markus Metzger <markus.t.metzger@intel.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <20090915130023.A16204@sedona.ch.intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
1 parent 4b77a72 commit 5622f29

File tree

3 files changed

+266
-174
lines changed

3 files changed

+266
-174
lines changed

arch/x86/kernel/cpu/perf_counter.c

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ static u64 perf_counter_mask __read_mostly;
3636
#define BTS_RECORD_SIZE 24
3737

3838
/* The size of a per-cpu BTS buffer in bytes: */
39-
#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 1024)
39+
#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048)
4040

4141
/* The BTS overflow threshold in bytes from the end of the buffer: */
42-
#define BTS_OVFL_TH (BTS_RECORD_SIZE * 64)
42+
#define BTS_OVFL_TH (BTS_RECORD_SIZE * 128)
4343

4444

4545
/*
@@ -1488,8 +1488,7 @@ void perf_counter_print_debug(void)
14881488
local_irq_restore(flags);
14891489
}
14901490

1491-
static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
1492-
struct perf_sample_data *data)
1491+
static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc)
14931492
{
14941493
struct debug_store *ds = cpuc->ds;
14951494
struct bts_record {
@@ -1498,8 +1497,11 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
14981497
u64 flags;
14991498
};
15001499
struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS];
1501-
unsigned long orig_ip = data->regs->ip;
15021500
struct bts_record *at, *top;
1501+
struct perf_output_handle handle;
1502+
struct perf_event_header header;
1503+
struct perf_sample_data data;
1504+
struct pt_regs regs;
15031505

15041506
if (!counter)
15051507
return;
@@ -1510,19 +1512,38 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
15101512
at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
15111513
top = (struct bts_record *)(unsigned long)ds->bts_index;
15121514

1515+
if (top <= at)
1516+
return;
1517+
15131518
ds->bts_index = ds->bts_buffer_base;
15141519

1520+
1521+
data.period = counter->hw.last_period;
1522+
data.addr = 0;
1523+
regs.ip = 0;
1524+
1525+
/*
1526+
* Prepare a generic sample, i.e. fill in the invariant fields.
1527+
* We will overwrite the from and to address before we output
1528+
* the sample.
1529+
*/
1530+
perf_prepare_sample(&header, &data, counter, &regs);
1531+
1532+
if (perf_output_begin(&handle, counter,
1533+
header.size * (top - at), 1, 1))
1534+
return;
1535+
15151536
for (; at < top; at++) {
1516-
data->regs->ip = at->from;
1517-
data->addr = at->to;
1537+
data.ip = at->from;
1538+
data.addr = at->to;
15181539

1519-
perf_counter_output(counter, 1, data);
1540+
perf_output_sample(&handle, &header, &data, counter);
15201541
}
15211542

1522-
data->regs->ip = orig_ip;
1523-
data->addr = 0;
1543+
perf_output_end(&handle);
15241544

15251545
/* There's new data available. */
1546+
counter->hw.interrupts++;
15261547
counter->pending_kill = POLL_IN;
15271548
}
15281549

@@ -1552,13 +1573,9 @@ static void x86_pmu_disable(struct perf_counter *counter)
15521573
x86_perf_counter_update(counter, hwc, idx);
15531574

15541575
/* Drain the remaining BTS records. */
1555-
if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
1556-
struct perf_sample_data data;
1557-
struct pt_regs regs;
1576+
if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
1577+
intel_pmu_drain_bts_buffer(cpuc);
15581578

1559-
data.regs = &regs;
1560-
intel_pmu_drain_bts_buffer(cpuc, &data);
1561-
}
15621579
cpuc->counters[idx] = NULL;
15631580
clear_bit(idx, cpuc->used_mask);
15641581

@@ -1619,7 +1636,6 @@ static int p6_pmu_handle_irq(struct pt_regs *regs)
16191636
int idx, handled = 0;
16201637
u64 val;
16211638

1622-
data.regs = regs;
16231639
data.addr = 0;
16241640

16251641
cpuc = &__get_cpu_var(cpu_hw_counters);
@@ -1644,7 +1660,7 @@ static int p6_pmu_handle_irq(struct pt_regs *regs)
16441660
if (!x86_perf_counter_set_period(counter, hwc, idx))
16451661
continue;
16461662

1647-
if (perf_counter_overflow(counter, 1, &data))
1663+
if (perf_counter_overflow(counter, 1, &data, regs))
16481664
p6_pmu_disable_counter(hwc, idx);
16491665
}
16501666

@@ -1665,13 +1681,12 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
16651681
int bit, loops;
16661682
u64 ack, status;
16671683

1668-
data.regs = regs;
16691684
data.addr = 0;
16701685

16711686
cpuc = &__get_cpu_var(cpu_hw_counters);
16721687

16731688
perf_disable();
1674-
intel_pmu_drain_bts_buffer(cpuc, &data);
1689+
intel_pmu_drain_bts_buffer(cpuc);
16751690
status = intel_pmu_get_status();
16761691
if (!status) {
16771692
perf_enable();
@@ -1702,7 +1717,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
17021717

17031718
data.period = counter->hw.last_period;
17041719

1705-
if (perf_counter_overflow(counter, 1, &data))
1720+
if (perf_counter_overflow(counter, 1, &data, regs))
17061721
intel_pmu_disable_counter(&counter->hw, bit);
17071722
}
17081723

@@ -1729,7 +1744,6 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
17291744
int idx, handled = 0;
17301745
u64 val;
17311746

1732-
data.regs = regs;
17331747
data.addr = 0;
17341748

17351749
cpuc = &__get_cpu_var(cpu_hw_counters);
@@ -1754,7 +1768,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
17541768
if (!x86_perf_counter_set_period(counter, hwc, idx))
17551769
continue;
17561770

1757-
if (perf_counter_overflow(counter, 1, &data))
1771+
if (perf_counter_overflow(counter, 1, &data, regs))
17581772
amd_pmu_disable_counter(hwc, idx);
17591773
}
17601774

include/linux/perf_counter.h

Lines changed: 64 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,17 @@ struct perf_cpu_context {
691691
int recursion[4];
692692
};
693693

694+
struct perf_output_handle {
695+
struct perf_counter *counter;
696+
struct perf_mmap_data *data;
697+
unsigned long head;
698+
unsigned long offset;
699+
int nmi;
700+
int sample;
701+
int locked;
702+
unsigned long flags;
703+
};
704+
694705
#ifdef CONFIG_PERF_COUNTERS
695706

696707
/*
@@ -722,16 +733,38 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
722733
extern void perf_counter_update_userpage(struct perf_counter *counter);
723734

724735
struct perf_sample_data {
725-
struct pt_regs *regs;
736+
u64 type;
737+
738+
u64 ip;
739+
struct {
740+
u32 pid;
741+
u32 tid;
742+
} tid_entry;
743+
u64 time;
726744
u64 addr;
745+
u64 id;
746+
u64 stream_id;
747+
struct {
748+
u32 cpu;
749+
u32 reserved;
750+
} cpu_entry;
727751
u64 period;
752+
struct perf_callchain_entry *callchain;
728753
struct perf_raw_record *raw;
729754
};
730755

756+
extern void perf_output_sample(struct perf_output_handle *handle,
757+
struct perf_event_header *header,
758+
struct perf_sample_data *data,
759+
struct perf_counter *counter);
760+
extern void perf_prepare_sample(struct perf_event_header *header,
761+
struct perf_sample_data *data,
762+
struct perf_counter *counter,
763+
struct pt_regs *regs);
764+
731765
extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
732-
struct perf_sample_data *data);
733-
extern void perf_counter_output(struct perf_counter *counter, int nmi,
734-
struct perf_sample_data *data);
766+
struct perf_sample_data *data,
767+
struct pt_regs *regs);
735768

736769
/*
737770
* Return 1 for a software counter, 0 for a hardware counter
@@ -781,6 +814,12 @@ extern void perf_tpcounter_event(int event_id, u64 addr, u64 count,
781814
#define perf_instruction_pointer(regs) instruction_pointer(regs)
782815
#endif
783816

817+
extern int perf_output_begin(struct perf_output_handle *handle,
818+
struct perf_counter *counter, unsigned int size,
819+
int nmi, int sample);
820+
extern void perf_output_end(struct perf_output_handle *handle);
821+
extern void perf_output_copy(struct perf_output_handle *handle,
822+
const void *buf, unsigned int len);
784823
#else
785824
static inline void
786825
perf_counter_task_sched_in(struct task_struct *task, int cpu) { }
@@ -807,7 +846,28 @@ static inline void perf_counter_mmap(struct vm_area_struct *vma) { }
807846
static inline void perf_counter_comm(struct task_struct *tsk) { }
808847
static inline void perf_counter_fork(struct task_struct *tsk) { }
809848
static inline void perf_counter_init(void) { }
849+
850+
static inline int
851+
perf_output_begin(struct perf_output_handle *handle, struct perf_counter *c,
852+
unsigned int size, int nmi, int sample) { }
853+
static inline void perf_output_end(struct perf_output_handle *handle) { }
854+
static inline void
855+
perf_output_copy(struct perf_output_handle *handle,
856+
const void *buf, unsigned int len) { }
857+
static inline void
858+
perf_output_sample(struct perf_output_handle *handle,
859+
struct perf_event_header *header,
860+
struct perf_sample_data *data,
861+
struct perf_counter *counter) { }
862+
static inline void
863+
perf_prepare_sample(struct perf_event_header *header,
864+
struct perf_sample_data *data,
865+
struct perf_counter *counter,
866+
struct pt_regs *regs) { }
810867
#endif
811868

869+
#define perf_output_put(handle, x) \
870+
perf_output_copy((handle), &(x), sizeof(x))
871+
812872
#endif /* __KERNEL__ */
813873
#endif /* _LINUX_PERF_COUNTER_H */

0 commit comments

Comments
 (0)