Skip to content

Commit 6e948c6

Browse files
author
Ingo Molnar
committed
Merge branch 'perf/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
Pull perf tooling fixes from Arnaldo Carvalho de Melo: "- Fix window dimensions change handling in 'perf top' (Jiri Olsa) - Fix 'perf record -c/-F' options for CPU event aliases (Andi Kleen) - Generate PERF_RECORD_{MMAP,COMM,EXEC} with 'perf record --delay' fixing symbol resolution for processes created, maps put in place while --delay happens (Arnaldo Carvalho de Melo) - Fix up leftover perf_evsel_stat usage via evsel->priv, plugging a SEGV when using event groups as in: $ perf stat -e '{cpu-clock,instructions}' workload - Fix 'perf script --per-event-dump' for auxtrace synth evsels (Arnaldo Carvalho de Melo) - Ignore kptr_restrict when not sampling the kernel (Arnaldo Carvalho de Melo) - Synchronize kernel ABI headers wrt SPDX tags and ABI changes, taking minimal action to handle new syscall args and silencing perf build warnings (Arnaldo Carvalho de Melo, Ingo Molnar) - Fix header.size for namespace events (Jiri Olsa) - Fix a bug during strstart() conversion in 'perf help' (Namhyung Kim) - Do not truncate instruction names at 6 chars in 'perf annotate', there are really long instruction names in PPC (Ravi Bangoria) - Fixup discontiguous/sparse numa nodes in 'perf bench numa' (Satheesh Rajendran) - Fix an exit code of trace__symbols_init in 'perf trace' (Andrei Vagin) - Fix 'perf test' entries on s/390 (Thomas Richter) - Bring instruction decoder files used by Intel PT into line with the kernel, silencing build warning (Adrian Hunter)" Signed-off-by: Ingo Molnar <mingo@kernel.org>
2 parents 4fc31ba + 1b3b521 commit 6e948c6

File tree

33 files changed

+593
-329
lines changed

33 files changed

+593
-329
lines changed

kernel/events/core.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6640,6 +6640,7 @@ static void perf_event_namespaces_output(struct perf_event *event,
66406640
struct perf_namespaces_event *namespaces_event = data;
66416641
struct perf_output_handle handle;
66426642
struct perf_sample_data sample;
6643+
u16 header_size = namespaces_event->event_id.header.size;
66436644
int ret;
66446645

66456646
if (!perf_event_namespaces_match(event))
@@ -6650,7 +6651,7 @@ static void perf_event_namespaces_output(struct perf_event *event,
66506651
ret = perf_output_begin(&handle, event,
66516652
namespaces_event->event_id.header.size);
66526653
if (ret)
6653-
return;
6654+
goto out;
66546655

66556656
namespaces_event->event_id.pid = perf_event_pid(event,
66566657
namespaces_event->task);
@@ -6662,6 +6663,8 @@ static void perf_event_namespaces_output(struct perf_event *event,
66626663
perf_event__output_id_sample(event, &handle, &sample);
66636664

66646665
perf_output_end(&handle);
6666+
out:
6667+
namespaces_event->event_id.header.size = header_size;
66656668
}
66666669

66676670
static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,

tools/arch/arm/include/uapi/asm/kvm.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,12 @@ struct kvm_arch_memory_slot {
152152
(__ARM_CP15_REG(op1, 0, crm, 0) | KVM_REG_SIZE_U64)
153153
#define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__)
154154

155+
/* PL1 Physical Timer Registers */
156+
#define KVM_REG_ARM_PTIMER_CTL ARM_CP15_REG32(0, 14, 2, 1)
157+
#define KVM_REG_ARM_PTIMER_CNT ARM_CP15_REG64(0, 14)
158+
#define KVM_REG_ARM_PTIMER_CVAL ARM_CP15_REG64(2, 14)
159+
160+
/* Virtual Timer Registers */
155161
#define KVM_REG_ARM_TIMER_CTL ARM_CP15_REG32(0, 14, 3, 1)
156162
#define KVM_REG_ARM_TIMER_CNT ARM_CP15_REG64(1, 14)
157163
#define KVM_REG_ARM_TIMER_CVAL ARM_CP15_REG64(3, 14)
@@ -216,6 +222,7 @@ struct kvm_arch_memory_slot {
216222
#define KVM_DEV_ARM_ITS_SAVE_TABLES 1
217223
#define KVM_DEV_ARM_ITS_RESTORE_TABLES 2
218224
#define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3
225+
#define KVM_DEV_ARM_ITS_CTRL_RESET 4
219226

220227
/* KVM_IRQ_LINE irq field index values */
221228
#define KVM_ARM_IRQ_TYPE_SHIFT 24

tools/arch/arm64/include/uapi/asm/kvm.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,12 @@ struct kvm_arch_memory_slot {
196196

197197
#define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_SIZE_U64)
198198

199+
/* Physical Timer EL0 Registers */
200+
#define KVM_REG_ARM_PTIMER_CTL ARM64_SYS_REG(3, 3, 14, 2, 1)
201+
#define KVM_REG_ARM_PTIMER_CVAL ARM64_SYS_REG(3, 3, 14, 2, 2)
202+
#define KVM_REG_ARM_PTIMER_CNT ARM64_SYS_REG(3, 3, 14, 0, 1)
203+
204+
/* EL0 Virtual Timer Registers */
199205
#define KVM_REG_ARM_TIMER_CTL ARM64_SYS_REG(3, 3, 14, 3, 1)
200206
#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2)
201207
#define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2)
@@ -228,6 +234,7 @@ struct kvm_arch_memory_slot {
228234
#define KVM_DEV_ARM_ITS_SAVE_TABLES 1
229235
#define KVM_DEV_ARM_ITS_RESTORE_TABLES 2
230236
#define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3
237+
#define KVM_DEV_ARM_ITS_CTRL_RESET 4
231238

232239
/* Device Control API on vcpu fd */
233240
#define KVM_ARM_VCPU_PMU_V3_CTRL 0

tools/arch/x86/include/asm/cpufeatures.h

Lines changed: 274 additions & 263 deletions
Large diffs are not rendered by default.

tools/arch/x86/include/asm/disabled-features.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@
1616
# define DISABLE_MPX (1<<(X86_FEATURE_MPX & 31))
1717
#endif
1818

19+
#ifdef CONFIG_X86_INTEL_UMIP
20+
# define DISABLE_UMIP 0
21+
#else
22+
# define DISABLE_UMIP (1<<(X86_FEATURE_UMIP & 31))
23+
#endif
24+
1925
#ifdef CONFIG_X86_64
2026
# define DISABLE_VME (1<<(X86_FEATURE_VME & 31))
2127
# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31))
@@ -63,7 +69,7 @@
6369
#define DISABLED_MASK13 0
6470
#define DISABLED_MASK14 0
6571
#define DISABLED_MASK15 0
66-
#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57)
72+
#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP)
6773
#define DISABLED_MASK17 0
6874
#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
6975

tools/include/uapi/asm-generic/mman.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#define MAP_NONBLOCK 0x10000 /* do not block on IO */
1414
#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */
1515
#define MAP_HUGETLB 0x40000 /* create a huge page mapping */
16+
#define MAP_SYNC 0x80000 /* perform synchronous page faults for the mapping */
1617

1718
/* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */
1819

tools/include/uapi/drm/drm.h

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,28 @@ struct drm_syncobj_array {
737737
__u32 pad;
738738
};
739739

740+
/* Query current scanout sequence number */
741+
struct drm_crtc_get_sequence {
742+
__u32 crtc_id; /* requested crtc_id */
743+
__u32 active; /* return: crtc output is active */
744+
__u64 sequence; /* return: most recent vblank sequence */
745+
__s64 sequence_ns; /* return: most recent time of first pixel out */
746+
};
747+
748+
/* Queue event to be delivered at specified sequence. Time stamp marks
749+
* when the first pixel of the refresh cycle leaves the display engine
750+
* for the display
751+
*/
752+
#define DRM_CRTC_SEQUENCE_RELATIVE 0x00000001 /* sequence is relative to current */
753+
#define DRM_CRTC_SEQUENCE_NEXT_ON_MISS 0x00000002 /* Use next sequence if we've missed */
754+
755+
struct drm_crtc_queue_sequence {
756+
__u32 crtc_id;
757+
__u32 flags;
758+
__u64 sequence; /* on input, target sequence. on output, actual sequence */
759+
__u64 user_data; /* user data passed to event */
760+
};
761+
740762
#if defined(__cplusplus)
741763
}
742764
#endif
@@ -819,6 +841,9 @@ extern "C" {
819841

820842
#define DRM_IOCTL_WAIT_VBLANK DRM_IOWR(0x3a, union drm_wait_vblank)
821843

844+
#define DRM_IOCTL_CRTC_GET_SEQUENCE DRM_IOWR(0x3b, struct drm_crtc_get_sequence)
845+
#define DRM_IOCTL_CRTC_QUEUE_SEQUENCE DRM_IOWR(0x3c, struct drm_crtc_queue_sequence)
846+
822847
#define DRM_IOCTL_UPDATE_DRAW DRM_IOW(0x3f, struct drm_update_draw)
823848

824849
#define DRM_IOCTL_MODE_GETRESOURCES DRM_IOWR(0xA0, struct drm_mode_card_res)
@@ -863,6 +888,11 @@ extern "C" {
863888
#define DRM_IOCTL_SYNCOBJ_RESET DRM_IOWR(0xC4, struct drm_syncobj_array)
864889
#define DRM_IOCTL_SYNCOBJ_SIGNAL DRM_IOWR(0xC5, struct drm_syncobj_array)
865890

891+
#define DRM_IOCTL_MODE_CREATE_LEASE DRM_IOWR(0xC6, struct drm_mode_create_lease)
892+
#define DRM_IOCTL_MODE_LIST_LESSEES DRM_IOWR(0xC7, struct drm_mode_list_lessees)
893+
#define DRM_IOCTL_MODE_GET_LEASE DRM_IOWR(0xC8, struct drm_mode_get_lease)
894+
#define DRM_IOCTL_MODE_REVOKE_LEASE DRM_IOWR(0xC9, struct drm_mode_revoke_lease)
895+
866896
/**
867897
* Device specific ioctls should only be in their respective headers
868898
* The device specific ioctl range is from 0x40 to 0x9f.
@@ -893,6 +923,7 @@ struct drm_event {
893923

894924
#define DRM_EVENT_VBLANK 0x01
895925
#define DRM_EVENT_FLIP_COMPLETE 0x02
926+
#define DRM_EVENT_CRTC_SEQUENCE 0x03
896927

897928
struct drm_event_vblank {
898929
struct drm_event base;
@@ -903,6 +934,16 @@ struct drm_event_vblank {
903934
__u32 crtc_id; /* 0 on older kernels that do not support this */
904935
};
905936

937+
/* Event delivered at sequence. Time stamp marks when the first pixel
938+
* of the refresh cycle leaves the display engine for the display
939+
*/
940+
struct drm_event_crtc_sequence {
941+
struct drm_event base;
942+
__u64 user_data;
943+
__s64 time_ns;
944+
__u64 sequence;
945+
};
946+
906947
/* typedef area */
907948
#ifndef __KERNEL__
908949
typedef struct drm_clip_rect drm_clip_rect_t;

tools/include/uapi/drm/i915_drm.h

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -397,10 +397,20 @@ typedef struct drm_i915_irq_wait {
397397
#define I915_PARAM_MIN_EU_IN_POOL 39
398398
#define I915_PARAM_MMAP_GTT_VERSION 40
399399

400-
/* Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution
400+
/*
401+
* Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution
401402
* priorities and the driver will attempt to execute batches in priority order.
403+
* The param returns a capability bitmask, nonzero implies that the scheduler
404+
* is enabled, with different features present according to the mask.
405+
*
406+
* The initial priority for each batch is supplied by the context and is
407+
* controlled via I915_CONTEXT_PARAM_PRIORITY.
402408
*/
403409
#define I915_PARAM_HAS_SCHEDULER 41
410+
#define I915_SCHEDULER_CAP_ENABLED (1ul << 0)
411+
#define I915_SCHEDULER_CAP_PRIORITY (1ul << 1)
412+
#define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2)
413+
404414
#define I915_PARAM_HUC_STATUS 42
405415

406416
/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of
@@ -1309,14 +1319,16 @@ struct drm_i915_reg_read {
13091319
* be specified
13101320
*/
13111321
__u64 offset;
1322+
#define I915_REG_READ_8B_WA (1ul << 0)
1323+
13121324
__u64 val; /* Return value */
13131325
};
13141326
/* Known registers:
13151327
*
13161328
* Render engine timestamp - 0x2358 + 64bit - gen7+
13171329
* - Note this register returns an invalid value if using the default
1318-
* single instruction 8byte read, in order to workaround that use
1319-
* offset (0x2538 | 1) instead.
1330+
* single instruction 8byte read, in order to workaround that pass
1331+
* flag I915_REG_READ_8B_WA in offset field.
13201332
*
13211333
*/
13221334

@@ -1359,6 +1371,10 @@ struct drm_i915_gem_context_param {
13591371
#define I915_CONTEXT_PARAM_GTT_SIZE 0x3
13601372
#define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4
13611373
#define I915_CONTEXT_PARAM_BANNABLE 0x5
1374+
#define I915_CONTEXT_PARAM_PRIORITY 0x6
1375+
#define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */
1376+
#define I915_CONTEXT_DEFAULT_PRIORITY 0
1377+
#define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */
13621378
__u64 value;
13631379
};
13641380

@@ -1510,9 +1526,14 @@ struct drm_i915_perf_oa_config {
15101526
__u32 n_boolean_regs;
15111527
__u32 n_flex_regs;
15121528

1513-
__u64 __user mux_regs_ptr;
1514-
__u64 __user boolean_regs_ptr;
1515-
__u64 __user flex_regs_ptr;
1529+
/*
1530+
* These fields are pointers to tuples of u32 values (register
1531+
* address, value). For example the expected length of the buffer
1532+
* pointed by mux_regs_ptr is (2 * sizeof(u32) * n_mux_regs).
1533+
*/
1534+
__u64 mux_regs_ptr;
1535+
__u64 boolean_regs_ptr;
1536+
__u64 flex_regs_ptr;
15161537
};
15171538

15181539
#if defined(__cplusplus)

tools/include/uapi/linux/kcmp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
12
#ifndef _UAPI_LINUX_KCMP_H
23
#define _UAPI_LINUX_KCMP_H
34

tools/include/uapi/linux/kvm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -931,6 +931,7 @@ struct kvm_ppc_resize_hpt {
931931
#define KVM_CAP_PPC_SMT_POSSIBLE 147
932932
#define KVM_CAP_HYPERV_SYNIC2 148
933933
#define KVM_CAP_HYPERV_VP_INDEX 149
934+
#define KVM_CAP_S390_AIS_MIGRATION 150
934935

935936
#ifdef KVM_CAP_IRQ_ROUTING
936937

tools/include/uapi/linux/perf_event.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -942,6 +942,7 @@ enum perf_callchain_context {
942942
#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */
943943
#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */
944944
#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */
945+
#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */
945946

946947
#define PERF_FLAG_FD_NO_GROUP (1UL << 0)
947948
#define PERF_FLAG_FD_OUTPUT (1UL << 1)

tools/include/uapi/linux/prctl.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
12
#ifndef _LINUX_PRCTL_H
23
#define _LINUX_PRCTL_H
34

@@ -197,4 +198,13 @@ struct prctl_mm_map {
197198
# define PR_CAP_AMBIENT_LOWER 3
198199
# define PR_CAP_AMBIENT_CLEAR_ALL 4
199200

201+
/* arm64 Scalable Vector Extension controls */
202+
/* Flag values must be kept in sync with ptrace NT_ARM_SVE interface */
203+
#define PR_SVE_SET_VL 50 /* set task vector length */
204+
# define PR_SVE_SET_VL_ONEXEC (1 << 18) /* defer effect until exec */
205+
#define PR_SVE_GET_VL 51 /* get task vector length */
206+
/* Bits common to PR_SVE_SET_VL and PR_SVE_GET_VL */
207+
# define PR_SVE_VL_LEN_MASK 0xffff
208+
# define PR_SVE_VL_INHERIT (1 << 17) /* inherit across exec */
209+
200210
#endif /* _LINUX_PRCTL_H */

tools/perf/bench/numa.c

Lines changed: 51 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,47 @@ static const char * const numa_usage[] = {
216216
NULL
217217
};
218218

219+
/*
220+
* To get number of numa nodes present.
221+
*/
222+
static int nr_numa_nodes(void)
223+
{
224+
int i, nr_nodes = 0;
225+
226+
for (i = 0; i < g->p.nr_nodes; i++) {
227+
if (numa_bitmask_isbitset(numa_nodes_ptr, i))
228+
nr_nodes++;
229+
}
230+
231+
return nr_nodes;
232+
}
233+
234+
/*
235+
* To check if given numa node is present.
236+
*/
237+
static int is_node_present(int node)
238+
{
239+
return numa_bitmask_isbitset(numa_nodes_ptr, node);
240+
}
241+
242+
/*
243+
* To check given numa node has cpus.
244+
*/
245+
static bool node_has_cpus(int node)
246+
{
247+
struct bitmask *cpu = numa_allocate_cpumask();
248+
unsigned int i;
249+
250+
if (cpu && !numa_node_to_cpus(node, cpu)) {
251+
for (i = 0; i < cpu->size; i++) {
252+
if (numa_bitmask_isbitset(cpu, i))
253+
return true;
254+
}
255+
}
256+
257+
return false; /* lets fall back to nocpus safely */
258+
}
259+
219260
static cpu_set_t bind_to_cpu(int target_cpu)
220261
{
221262
cpu_set_t orig_mask, mask;
@@ -244,12 +285,12 @@ static cpu_set_t bind_to_cpu(int target_cpu)
244285

245286
static cpu_set_t bind_to_node(int target_node)
246287
{
247-
int cpus_per_node = g->p.nr_cpus/g->p.nr_nodes;
288+
int cpus_per_node = g->p.nr_cpus / nr_numa_nodes();
248289
cpu_set_t orig_mask, mask;
249290
int cpu;
250291
int ret;
251292

252-
BUG_ON(cpus_per_node*g->p.nr_nodes != g->p.nr_cpus);
293+
BUG_ON(cpus_per_node * nr_numa_nodes() != g->p.nr_cpus);
253294
BUG_ON(!cpus_per_node);
254295

255296
ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
@@ -649,7 +690,7 @@ static int parse_setup_node_list(void)
649690
int i;
650691

651692
for (i = 0; i < mul; i++) {
652-
if (t >= g->p.nr_tasks) {
693+
if (t >= g->p.nr_tasks || !node_has_cpus(bind_node)) {
653694
printf("\n# NOTE: ignoring bind NODEs starting at NODE#%d\n", bind_node);
654695
goto out;
655696
}
@@ -964,6 +1005,8 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
9641005
sum = 0;
9651006

9661007
for (node = 0; node < g->p.nr_nodes; node++) {
1008+
if (!is_node_present(node))
1009+
continue;
9671010
nr = nodes[node];
9681011
nr_min = min(nr, nr_min);
9691012
nr_max = max(nr, nr_max);
@@ -984,8 +1027,11 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
9841027
process_groups = 0;
9851028

9861029
for (node = 0; node < g->p.nr_nodes; node++) {
987-
int processes = count_node_processes(node);
1030+
int processes;
9881031

1032+
if (!is_node_present(node))
1033+
continue;
1034+
processes = count_node_processes(node);
9891035
nr = nodes[node];
9901036
tprintf(" %2d/%-2d", nr, processes);
9911037

@@ -1291,7 +1337,7 @@ static void print_summary(void)
12911337

12921338
printf("\n ###\n");
12931339
printf(" # %d %s will execute (on %d nodes, %d CPUs):\n",
1294-
g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", g->p.nr_nodes, g->p.nr_cpus);
1340+
g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", nr_numa_nodes(), g->p.nr_cpus);
12951341
printf(" # %5dx %5ldMB global shared mem operations\n",
12961342
g->p.nr_loops, g->p.bytes_global/1024/1024);
12971343
printf(" # %5dx %5ldMB process shared mem operations\n",

0 commit comments

Comments
 (0)