Skip to content

Commit c1961a4

Browse files
Hugh DickinsIngo Molnar
authored andcommitted
x86/events/intel/ds: Map debug buffers in cpu_entry_area
The BTS and PEBS buffers both have their virtual addresses programmed into the hardware. This means that any access to them is performed via the page tables. The times that the hardware accesses these are entirely dependent on how the performance monitoring hardware events are set up. In other words, there is no way for the kernel to tell when the hardware might access these buffers. To avoid perf crashes, place 'debug_store' allocate pages and map them into the cpu_entry_area. The PEBS fixup buffer does not need this treatment. [ tglx: Got rid of the kaiser_add_mapping() complication ] Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Andy Lutomirski <luto@kernel.org> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: David Laight <David.Laight@aculab.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Eduardo Valentin <eduval@amazon.com> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will.deacon@arm.com> Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: keescook@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent 10043e0 commit c1961a4

File tree

2 files changed

+82
-45
lines changed

2 files changed

+82
-45
lines changed

arch/x86/events/intel/ds.c

Lines changed: 80 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <linux/types.h>
44
#include <linux/slab.h>
55

6+
#include <asm/cpu_entry_area.h>
67
#include <asm/perf_event.h>
78
#include <asm/insn.h>
89

@@ -280,17 +281,52 @@ void fini_debug_store_on_cpu(int cpu)
280281

281282
static DEFINE_PER_CPU(void *, insn_buffer);
282283

283-
static int alloc_pebs_buffer(int cpu)
284+
static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
284285
{
285-
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
286+
phys_addr_t pa;
287+
size_t msz = 0;
288+
289+
pa = virt_to_phys(addr);
290+
for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
291+
cea_set_pte(cea, pa, prot);
292+
}
293+
294+
static void ds_clear_cea(void *cea, size_t size)
295+
{
296+
size_t msz = 0;
297+
298+
for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
299+
cea_set_pte(cea, 0, PAGE_NONE);
300+
}
301+
302+
static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
303+
{
304+
unsigned int order = get_order(size);
286305
int node = cpu_to_node(cpu);
287-
int max;
288-
void *buffer, *ibuffer;
306+
struct page *page;
307+
308+
page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
309+
return page ? page_address(page) : NULL;
310+
}
311+
312+
static void dsfree_pages(const void *buffer, size_t size)
313+
{
314+
if (buffer)
315+
free_pages((unsigned long)buffer, get_order(size));
316+
}
317+
318+
static int alloc_pebs_buffer(int cpu)
319+
{
320+
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
321+
struct debug_store *ds = hwev->ds;
322+
size_t bsiz = x86_pmu.pebs_buffer_size;
323+
int max, node = cpu_to_node(cpu);
324+
void *buffer, *ibuffer, *cea;
289325

290326
if (!x86_pmu.pebs)
291327
return 0;
292328

293-
buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
329+
buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
294330
if (unlikely(!buffer))
295331
return -ENOMEM;
296332

@@ -301,99 +337,98 @@ static int alloc_pebs_buffer(int cpu)
301337
if (x86_pmu.intel_cap.pebs_format < 2) {
302338
ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
303339
if (!ibuffer) {
304-
kfree(buffer);
340+
dsfree_pages(buffer, bsiz);
305341
return -ENOMEM;
306342
}
307343
per_cpu(insn_buffer, cpu) = ibuffer;
308344
}
309-
310-
max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
311-
312-
ds->pebs_buffer_base = (u64)(unsigned long)buffer;
345+
hwev->ds_pebs_vaddr = buffer;
346+
/* Update the cpu entry area mapping */
347+
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
348+
ds->pebs_buffer_base = (unsigned long) cea;
349+
ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
313350
ds->pebs_index = ds->pebs_buffer_base;
314-
ds->pebs_absolute_maximum = ds->pebs_buffer_base +
315-
max * x86_pmu.pebs_record_size;
316-
351+
max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
352+
ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
317353
return 0;
318354
}
319355

320356
static void release_pebs_buffer(int cpu)
321357
{
322-
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
358+
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
359+
struct debug_store *ds = hwev->ds;
360+
void *cea;
323361

324362
if (!ds || !x86_pmu.pebs)
325363
return;
326364

327365
kfree(per_cpu(insn_buffer, cpu));
328366
per_cpu(insn_buffer, cpu) = NULL;
329367

330-
kfree((void *)(unsigned long)ds->pebs_buffer_base);
368+
/* Clear the fixmap */
369+
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
370+
ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
331371
ds->pebs_buffer_base = 0;
372+
dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
373+
hwev->ds_pebs_vaddr = NULL;
332374
}
333375

334376
static int alloc_bts_buffer(int cpu)
335377
{
336-
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
337-
int node = cpu_to_node(cpu);
338-
int max, thresh;
339-
void *buffer;
378+
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
379+
struct debug_store *ds = hwev->ds;
380+
void *buffer, *cea;
381+
int max;
340382

341383
if (!x86_pmu.bts)
342384
return 0;
343385

344-
buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
386+
buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
345387
if (unlikely(!buffer)) {
346388
WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
347389
return -ENOMEM;
348390
}
349-
350-
max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
351-
thresh = max / 16;
352-
353-
ds->bts_buffer_base = (u64)(unsigned long)buffer;
391+
hwev->ds_bts_vaddr = buffer;
392+
/* Update the fixmap */
393+
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
394+
ds->bts_buffer_base = (unsigned long) cea;
395+
ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
354396
ds->bts_index = ds->bts_buffer_base;
355-
ds->bts_absolute_maximum = ds->bts_buffer_base +
356-
max * BTS_RECORD_SIZE;
357-
ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
358-
thresh * BTS_RECORD_SIZE;
359-
397+
max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE);
398+
ds->bts_absolute_maximum = ds->bts_buffer_base + max;
399+
ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16);
360400
return 0;
361401
}
362402

363403
static void release_bts_buffer(int cpu)
364404
{
365-
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
405+
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
406+
struct debug_store *ds = hwev->ds;
407+
void *cea;
366408

367409
if (!ds || !x86_pmu.bts)
368410
return;
369411

370-
kfree((void *)(unsigned long)ds->bts_buffer_base);
412+
/* Clear the fixmap */
413+
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
414+
ds_clear_cea(cea, BTS_BUFFER_SIZE);
371415
ds->bts_buffer_base = 0;
416+
dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
417+
hwev->ds_bts_vaddr = NULL;
372418
}
373419

374420
static int alloc_ds_buffer(int cpu)
375421
{
376-
int node = cpu_to_node(cpu);
377-
struct debug_store *ds;
378-
379-
ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
380-
if (unlikely(!ds))
381-
return -ENOMEM;
422+
struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
382423

424+
memset(ds, 0, sizeof(*ds));
383425
per_cpu(cpu_hw_events, cpu).ds = ds;
384-
385426
return 0;
386427
}
387428

388429
static void release_ds_buffer(int cpu)
389430
{
390-
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
391-
392-
if (!ds)
393-
return;
394-
395431
per_cpu(cpu_hw_events, cpu).ds = NULL;
396-
kfree(ds);
397432
}
398433

399434
void release_ds_buffers(void)

arch/x86/events/perf_event.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,8 @@ struct cpu_hw_events {
199199
* Intel DebugStore bits
200200
*/
201201
struct debug_store *ds;
202+
void *ds_pebs_vaddr;
203+
void *ds_bts_vaddr;
202204
u64 pebs_enabled;
203205
int n_pebs;
204206
int n_large_pebs;

0 commit comments

Comments
 (0)