Skip to content

Commit be3f152

Browse files
melverPeter Zijlstra
authored andcommitted
perf/hw_breakpoint: Optimize constant number of breakpoint slots
Optimize internal hw_breakpoint state if the architecture's number of breakpoint slots is constant. This avoids several kmalloc() calls and potentially unnecessary failures if the allocations fail, as well as subtly improves code generation and cache locality. The protocol is that if an architecture defines hw_breakpoint_slots via the preprocessor, it must be constant and the same for all types. Signed-off-by: Marco Elver <elver@google.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Dmitry Vyukov <dvyukov@google.com> Acked-by: Ian Rogers <irogers@google.com> Link: https://lore.kernel.org/r/20220829124719.675715-7-elver@google.com
1 parent db5f6f8 commit be3f152

File tree

3 files changed

+63
-41
lines changed

3 files changed

+63
-41
lines changed

arch/sh/include/asm/hw_breakpoint.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,7 @@ struct pmu;
4848
/* Maximum number of UBC channels */
4949
#define HBP_NUM 2
5050

51-
static inline int hw_breakpoint_slots(int type)
52-
{
53-
return HBP_NUM;
54-
}
51+
#define hw_breakpoint_slots(type) (HBP_NUM)
5552

5653
/* arch/sh/kernel/hw_breakpoint.c */
5754
extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);

arch/x86/include/asm/hw_breakpoint.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,7 @@ struct arch_hw_breakpoint {
4444
/* Total number of available HW breakpoint registers */
4545
#define HBP_NUM 4
4646

47-
static inline int hw_breakpoint_slots(int type)
48-
{
49-
return HBP_NUM;
50-
}
47+
#define hw_breakpoint_slots(type) (HBP_NUM)
5148

5249
struct perf_event_attr;
5350
struct perf_event;

kernel/events/hw_breakpoint.c

Lines changed: 61 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,16 @@ struct bp_cpuinfo {
4040
/* Number of pinned cpu breakpoints in a cpu */
4141
unsigned int cpu_pinned;
4242
/* tsk_pinned[n] is the number of tasks having n+1 breakpoints */
43+
#ifdef hw_breakpoint_slots
44+
unsigned int tsk_pinned[hw_breakpoint_slots(0)];
45+
#else
4346
unsigned int *tsk_pinned;
47+
#endif
4448
/* Number of non-pinned cpu/task breakpoints in a cpu */
4549
unsigned int flexible; /* XXX: placeholder, see fetch_this_slot() */
4650
};
4751

4852
static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]);
49-
static int nr_slots[TYPE_MAX] __ro_after_init;
5053

5154
static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type)
5255
{
@@ -73,6 +76,54 @@ struct bp_busy_slots {
7376
/* Serialize accesses to the above constraints */
7477
static DEFINE_MUTEX(nr_bp_mutex);
7578

79+
#ifdef hw_breakpoint_slots
80+
/*
81+
* Number of breakpoint slots is constant, and the same for all types.
82+
*/
83+
static_assert(hw_breakpoint_slots(TYPE_INST) == hw_breakpoint_slots(TYPE_DATA));
84+
static inline int hw_breakpoint_slots_cached(int type) { return hw_breakpoint_slots(type); }
85+
static inline int init_breakpoint_slots(void) { return 0; }
86+
#else
87+
/*
88+
* Dynamic number of breakpoint slots.
89+
*/
90+
static int __nr_bp_slots[TYPE_MAX] __ro_after_init;
91+
92+
static inline int hw_breakpoint_slots_cached(int type)
93+
{
94+
return __nr_bp_slots[type];
95+
}
96+
97+
static __init int init_breakpoint_slots(void)
98+
{
99+
int i, cpu, err_cpu;
100+
101+
for (i = 0; i < TYPE_MAX; i++)
102+
__nr_bp_slots[i] = hw_breakpoint_slots(i);
103+
104+
for_each_possible_cpu(cpu) {
105+
for (i = 0; i < TYPE_MAX; i++) {
106+
struct bp_cpuinfo *info = get_bp_info(cpu, i);
107+
108+
info->tsk_pinned = kcalloc(__nr_bp_slots[i], sizeof(int), GFP_KERNEL);
109+
if (!info->tsk_pinned)
110+
goto err;
111+
}
112+
}
113+
114+
return 0;
115+
err:
116+
for_each_possible_cpu(err_cpu) {
117+
for (i = 0; i < TYPE_MAX; i++)
118+
kfree(get_bp_info(err_cpu, i)->tsk_pinned);
119+
if (err_cpu == cpu)
120+
break;
121+
}
122+
123+
return -ENOMEM;
124+
}
125+
#endif
126+
76127
__weak int hw_breakpoint_weight(struct perf_event *bp)
77128
{
78129
return 1;
@@ -95,7 +146,7 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
95146
unsigned int *tsk_pinned = get_bp_info(cpu, type)->tsk_pinned;
96147
int i;
97148

98-
for (i = nr_slots[type] - 1; i >= 0; i--) {
149+
for (i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) {
99150
if (tsk_pinned[i] > 0)
100151
return i + 1;
101152
}
@@ -312,7 +363,7 @@ static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
312363
fetch_this_slot(&slots, weight);
313364

314365
/* Flexible counters need to keep at least one slot */
315-
if (slots.pinned + (!!slots.flexible) > nr_slots[type])
366+
if (slots.pinned + (!!slots.flexible) > hw_breakpoint_slots_cached(type))
316367
return -ENOSPC;
317368

318369
ret = arch_reserve_bp_slot(bp);
@@ -632,7 +683,7 @@ bool hw_breakpoint_is_used(void)
632683
if (info->cpu_pinned)
633684
return true;
634685

635-
for (int slot = 0; slot < nr_slots[type]; ++slot) {
686+
for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) {
636687
if (info->tsk_pinned[slot])
637688
return true;
638689
}
@@ -716,42 +767,19 @@ static struct pmu perf_breakpoint = {
716767

717768
int __init init_hw_breakpoint(void)
718769
{
719-
int cpu, err_cpu;
720-
int i, ret;
721-
722-
for (i = 0; i < TYPE_MAX; i++)
723-
nr_slots[i] = hw_breakpoint_slots(i);
724-
725-
for_each_possible_cpu(cpu) {
726-
for (i = 0; i < TYPE_MAX; i++) {
727-
struct bp_cpuinfo *info = get_bp_info(cpu, i);
728-
729-
info->tsk_pinned = kcalloc(nr_slots[i], sizeof(int),
730-
GFP_KERNEL);
731-
if (!info->tsk_pinned) {
732-
ret = -ENOMEM;
733-
goto err;
734-
}
735-
}
736-
}
770+
int ret;
737771

738772
ret = rhltable_init(&task_bps_ht, &task_bps_ht_params);
739773
if (ret)
740-
goto err;
774+
return ret;
775+
776+
ret = init_breakpoint_slots();
777+
if (ret)
778+
return ret;
741779

742780
constraints_initialized = true;
743781

744782
perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
745783

746784
return register_die_notifier(&hw_breakpoint_exceptions_nb);
747-
748-
err:
749-
for_each_possible_cpu(err_cpu) {
750-
for (i = 0; i < TYPE_MAX; i++)
751-
kfree(get_bp_info(err_cpu, i)->tsk_pinned);
752-
if (err_cpu == cpu)
753-
break;
754-
}
755-
756-
return ret;
757785
}

0 commit comments

Comments
 (0)