490 lines
12 KiB
Text
490 lines
12 KiB
Text
|
perf.data format
|
||
|
|
||
|
Uptodate as of v4.7
|
||
|
|
||
|
This document describes the on-disk perf.data format, generated by perf record
|
||
|
or perf inject and consumed by the other perf tools.
|
||
|
|
||
|
On a high level perf.data contains the events generated by the PMUs, plus metadata.
|
||
|
|
||
|
All fields are in native-endian of the machine that generated the perf.data.
|
||
|
|
||
|
When perf is writing to a pipe it uses a special version of the file
|
||
|
format that does not rely on seeking to adjust data offsets. This
|
||
|
format is described in "Pipe-mode data" section. The pipe data version can be
|
||
|
augmented with additional events using perf inject.
|
||
|
|
||
|
The file starts with a perf_header:
|
||
|
|
||
|
struct perf_header {
|
||
|
char magic[8]; /* PERFILE2 */
|
||
|
uint64_t size; /* size of the header */
|
||
|
uint64_t attr_size; /* size of an attribute in attrs */
|
||
|
struct perf_file_section attrs;
|
||
|
struct perf_file_section data;
|
||
|
struct perf_file_section event_types;
|
||
|
uint64_t flags;
|
||
|
uint64_t flags1[3];
|
||
|
};
|
||
|
|
||
|
The magic number identifies the perf file and the version. Current perf versions
|
||
|
use PERFILE2. Old perf versions generated a version 1 format (PERFFILE). Version 1
|
||
|
is not described here. The magic number also identifies the endian. When the
|
||
|
magic value is 64bit byte swapped compared the file is in non-native
|
||
|
endian.
|
||
|
|
||
|
A perf_file_section contains a pointer to another section of the perf file.
|
||
|
The header contains three such pointers: for attributes, data and event types.
|
||
|
|
||
|
struct perf_file_section {
|
||
|
uint64_t offset; /* offset from start of file */
|
||
|
uint64_t size; /* size of the section */
|
||
|
};
|
||
|
|
||
|
Flags section:
|
||
|
|
||
|
The header is followed by different optional headers, described by the bits set
|
||
|
in flags. Only headers for which the bit is set are included. Each header
|
||
|
consists of a perf_file_section located after the initial header.
|
||
|
The respective perf_file_section points to the data of the additional
|
||
|
header and defines its size.
|
||
|
|
||
|
Some headers consist of strings, which are defined like this:
|
||
|
|
||
|
struct perf_header_string {
|
||
|
uint32_t len;
|
||
|
char string[len]; /* zero terminated */
|
||
|
};
|
||
|
|
||
|
Some headers consist of a sequence of strings, which start with a
|
||
|
|
||
|
struct perf_header_string_list {
|
||
|
uint32_t nr;
|
||
|
struct perf_header_string strings[nr]; /* variable length records */
|
||
|
};
|
||
|
|
||
|
The bits are the flags bits in a 256 bit bitmap starting with
|
||
|
flags. These define the valid bits:
|
||
|
|
||
|
HEADER_RESERVED = 0, /* always cleared */
|
||
|
HEADER_FIRST_FEATURE = 1,
|
||
|
HEADER_TRACING_DATA = 1,
|
||
|
|
||
|
Describe me.
|
||
|
|
||
|
HEADER_BUILD_ID = 2,
|
||
|
|
||
|
The header consists of an sequence of build_id_event. The size of each record
|
||
|
is defined by header.size (see perf_event.h). Each event defines a ELF build id
|
||
|
for a executable file name for a pid. An ELF build id is a unique identifier
|
||
|
assigned by the linker to an executable.
|
||
|
|
||
|
struct build_id_event {
|
||
|
struct perf_event_header header;
|
||
|
pid_t pid;
|
||
|
uint8_t build_id[24];
|
||
|
char filename[header.size - offsetof(struct build_id_event, filename)];
|
||
|
};
|
||
|
|
||
|
HEADER_HOSTNAME = 3,
|
||
|
|
||
|
A perf_header_string with the hostname where the data was collected
|
||
|
(uname -n)
|
||
|
|
||
|
HEADER_OSRELEASE = 4,
|
||
|
|
||
|
A perf_header_string with the os release where the data was collected
|
||
|
(uname -r)
|
||
|
|
||
|
HEADER_VERSION = 5,
|
||
|
|
||
|
A perf_header_string with the perf user tool version where the
|
||
|
data was collected. This is the same as the version of the source tree
|
||
|
the perf tool was built from.
|
||
|
|
||
|
HEADER_ARCH = 6,
|
||
|
|
||
|
A perf_header_string with the CPU architecture (uname -m)
|
||
|
|
||
|
HEADER_NRCPUS = 7,
|
||
|
|
||
|
A structure defining the number of CPUs.
|
||
|
|
||
|
struct nr_cpus {
|
||
|
uint32_t nr_cpus_online;
|
||
|
uint32_t nr_cpus_available; /* CPUs not yet onlined */
|
||
|
};
|
||
|
|
||
|
HEADER_CPUDESC = 8,
|
||
|
|
||
|
A perf_header_string with description of the CPU. On x86 this is the model name
|
||
|
in /proc/cpuinfo
|
||
|
|
||
|
HEADER_CPUID = 9,
|
||
|
|
||
|
A perf_header_string with the exact CPU type. On x86 this is
|
||
|
vendor,family,model,stepping. For example: GenuineIntel,6,69,1
|
||
|
|
||
|
HEADER_TOTAL_MEM = 10,
|
||
|
|
||
|
An uint64_t with the total memory in bytes.
|
||
|
|
||
|
HEADER_CMDLINE = 11,
|
||
|
|
||
|
A perf_header_string with the perf command line used to collect the data.
|
||
|
|
||
|
HEADER_EVENT_DESC = 12,
|
||
|
|
||
|
Another description of the perf_event_attrs, more detailed than header.attrs
|
||
|
including IDs and names. See perf_event.h or the man page for a description
|
||
|
of a struct perf_event_attr.
|
||
|
|
||
|
struct {
|
||
|
uint32_t nr; /* number of events */
|
||
|
uint32_t attr_size; /* size of each perf_event_attr */
|
||
|
struct {
|
||
|
struct perf_event_attr attr; /* size of attr_size */
|
||
|
uint32_t nr_ids;
|
||
|
struct perf_header_string event_string;
|
||
|
uint64_t ids[nr_ids];
|
||
|
} events[nr]; /* Variable length records */
|
||
|
};
|
||
|
|
||
|
HEADER_CPU_TOPOLOGY = 13,
|
||
|
|
||
|
String lists defining the core and CPU threads topology.
|
||
|
|
||
|
struct {
|
||
|
struct perf_header_string_list cores; /* Variable length */
|
||
|
struct perf_header_string_list threads; /* Variable length */
|
||
|
};
|
||
|
|
||
|
Example:
|
||
|
sibling cores : 0-3
|
||
|
sibling threads : 0-1
|
||
|
sibling threads : 2-3
|
||
|
|
||
|
HEADER_NUMA_TOPOLOGY = 14,
|
||
|
|
||
|
A list of NUMA node descriptions
|
||
|
|
||
|
struct {
|
||
|
uint32_t nr;
|
||
|
struct {
|
||
|
uint32_t nodenr;
|
||
|
uint64_t mem_total;
|
||
|
uint64_t mem_free;
|
||
|
struct perf_header_string cpus;
|
||
|
} nodes[nr]; /* Variable length records */
|
||
|
};
|
||
|
|
||
|
HEADER_BRANCH_STACK = 15,
|
||
|
|
||
|
Not implemented in perf.
|
||
|
|
||
|
HEADER_PMU_MAPPINGS = 16,
|
||
|
|
||
|
A list of PMU structures, defining the different PMUs supported by perf.
|
||
|
|
||
|
struct {
|
||
|
uint32_t nr;
|
||
|
struct pmu {
|
||
|
uint32_t pmu_type;
|
||
|
struct perf_header_string pmu_name;
|
||
|
} [nr]; /* Variable length records */
|
||
|
};
|
||
|
|
||
|
HEADER_GROUP_DESC = 17,
|
||
|
|
||
|
Description of counter groups ({...} in perf syntax)
|
||
|
|
||
|
struct {
|
||
|
uint32_t nr;
|
||
|
struct {
|
||
|
struct perf_header_string string;
|
||
|
uint32_t leader_idx;
|
||
|
uint32_t nr_members;
|
||
|
} [nr]; /* Variable length records */
|
||
|
};
|
||
|
|
||
|
HEADER_AUXTRACE = 18,
|
||
|
|
||
|
Define additional auxtrace areas in the perf.data. auxtrace is used to store
|
||
|
undecoded hardware tracing information, such as Intel Processor Trace data.
|
||
|
|
||
|
/**
|
||
|
* struct auxtrace_index_entry - indexes a AUX area tracing event within a
|
||
|
* perf.data file.
|
||
|
* @file_offset: offset within the perf.data file
|
||
|
* @sz: size of the event
|
||
|
*/
|
||
|
struct auxtrace_index_entry {
|
||
|
u64 file_offset;
|
||
|
u64 sz;
|
||
|
};
|
||
|
|
||
|
#define PERF_AUXTRACE_INDEX_ENTRY_COUNT 256
|
||
|
|
||
|
/**
|
||
|
* struct auxtrace_index - index of AUX area tracing events within a perf.data
|
||
|
* file.
|
||
|
* @list: linking a number of arrays of entries
|
||
|
* @nr: number of entries
|
||
|
* @entries: array of entries
|
||
|
*/
|
||
|
struct auxtrace_index {
|
||
|
struct list_head list;
|
||
|
size_t nr;
|
||
|
struct auxtrace_index_entry entries[PERF_AUXTRACE_INDEX_ENTRY_COUNT];
|
||
|
};
|
||
|
|
||
|
HEADER_STAT = 19,
|
||
|
|
||
|
This is merely a flag signifying that the data section contains data
|
||
|
recorded from perf stat record.
|
||
|
|
||
|
HEADER_CACHE = 20,
|
||
|
|
||
|
Description of the cache hierarchy. Based on the Linux sysfs format
|
||
|
in /sys/devices/system/cpu/cpu*/cache/
|
||
|
|
||
|
u32 version Currently always 1
|
||
|
u32 number_of_cache_levels
|
||
|
|
||
|
struct {
|
||
|
u32 level;
|
||
|
u32 line_size;
|
||
|
u32 sets;
|
||
|
u32 ways;
|
||
|
struct perf_header_string type;
|
||
|
struct perf_header_string size;
|
||
|
struct perf_header_string map;
|
||
|
}[number_of_cache_levels];
|
||
|
|
||
|
HEADER_SAMPLE_TIME = 21,
|
||
|
|
||
|
Two uint64_t for the time of first sample and the time of last sample.
|
||
|
|
||
|
other bits are reserved and should ignored for now
|
||
|
HEADER_FEAT_BITS = 256,
|
||
|
|
||
|
Attributes
|
||
|
|
||
|
This is an array of perf_event_attrs, each attr_size bytes long, which defines
|
||
|
each event collected. See perf_event.h or the man page for a detailed
|
||
|
description.
|
||
|
|
||
|
Data
|
||
|
|
||
|
This section is the bulk of the file. It consist of a stream of perf_events
|
||
|
describing events. This matches the format generated by the kernel.
|
||
|
See perf_event.h or the manpage for a detailed description.
|
||
|
|
||
|
Some notes on parsing:
|
||
|
|
||
|
Ordering
|
||
|
|
||
|
The events are not necessarily in time stamp order, as they can be
|
||
|
collected in parallel on different CPUs. If the events should be
|
||
|
processed in time order they need to be sorted first. It is possible
|
||
|
to only do a partial sort using the FINISHED_ROUND event header (see
|
||
|
below). perf record guarantees that there is no reordering over a
|
||
|
FINISHED_ROUND.
|
||
|
|
||
|
ID vs IDENTIFIER
|
||
|
|
||
|
When the event stream contains multiple events each event is identified
|
||
|
by an ID. This can be either through the PERF_SAMPLE_ID or the
|
||
|
PERF_SAMPLE_IDENTIFIER header. The PERF_SAMPLE_IDENTIFIER header is
|
||
|
at a fixed offset from the event header, which allows reliable
|
||
|
parsing of the header. Relying on ID may be ambiguous.
|
||
|
IDENTIFIER is only supported by newer Linux kernels.
|
||
|
|
||
|
Perf record specific events:
|
||
|
|
||
|
In addition to the kernel generated event types perf record adds its
|
||
|
own event types (in addition it also synthesizes some kernel events,
|
||
|
for example MMAP events)
|
||
|
|
||
|
PERF_RECORD_USER_TYPE_START = 64,
|
||
|
PERF_RECORD_HEADER_ATTR = 64,
|
||
|
|
||
|
struct attr_event {
|
||
|
struct perf_event_header header;
|
||
|
struct perf_event_attr attr;
|
||
|
uint64_t id[];
|
||
|
};
|
||
|
|
||
|
PERF_RECORD_HEADER_EVENT_TYPE = 65, /* deprecated */
|
||
|
|
||
|
#define MAX_EVENT_NAME 64
|
||
|
|
||
|
struct perf_trace_event_type {
|
||
|
uint64_t event_id;
|
||
|
char name[MAX_EVENT_NAME];
|
||
|
};
|
||
|
|
||
|
struct event_type_event {
|
||
|
struct perf_event_header header;
|
||
|
struct perf_trace_event_type event_type;
|
||
|
};
|
||
|
|
||
|
|
||
|
PERF_RECORD_HEADER_TRACING_DATA = 66,
|
||
|
|
||
|
Describe me
|
||
|
|
||
|
struct tracing_data_event {
|
||
|
struct perf_event_header header;
|
||
|
uint32_t size;
|
||
|
};
|
||
|
|
||
|
PERF_RECORD_HEADER_BUILD_ID = 67,
|
||
|
|
||
|
Define a ELF build ID for a referenced executable.
|
||
|
|
||
|
struct build_id_event; /* See above */
|
||
|
|
||
|
PERF_RECORD_FINISHED_ROUND = 68,
|
||
|
|
||
|
No event reordering over this header. No payload.
|
||
|
|
||
|
PERF_RECORD_ID_INDEX = 69,
|
||
|
|
||
|
Map event ids to CPUs and TIDs.
|
||
|
|
||
|
struct id_index_entry {
|
||
|
uint64_t id;
|
||
|
uint64_t idx;
|
||
|
uint64_t cpu;
|
||
|
uint64_t tid;
|
||
|
};
|
||
|
|
||
|
struct id_index_event {
|
||
|
struct perf_event_header header;
|
||
|
uint64_t nr;
|
||
|
struct id_index_entry entries[nr];
|
||
|
};
|
||
|
|
||
|
PERF_RECORD_AUXTRACE_INFO = 70,
|
||
|
|
||
|
Auxtrace type specific information. Describe me
|
||
|
|
||
|
struct auxtrace_info_event {
|
||
|
struct perf_event_header header;
|
||
|
uint32_t type;
|
||
|
uint32_t reserved__; /* For alignment */
|
||
|
uint64_t priv[];
|
||
|
};
|
||
|
|
||
|
PERF_RECORD_AUXTRACE = 71,
|
||
|
|
||
|
Defines auxtrace data. Followed by the actual data. The contents of
|
||
|
the auxtrace data is dependent on the event and the CPU. For example
|
||
|
for Intel Processor Trace it contains Processor Trace data generated
|
||
|
by the CPU.
|
||
|
|
||
|
struct auxtrace_event {
|
||
|
struct perf_event_header header;
|
||
|
uint64_t size;
|
||
|
uint64_t offset;
|
||
|
uint64_t reference;
|
||
|
uint32_t idx;
|
||
|
uint32_t tid;
|
||
|
uint32_t cpu;
|
||
|
uint32_t reserved__; /* For alignment */
|
||
|
};
|
||
|
|
||
|
struct aux_event {
|
||
|
struct perf_event_header header;
|
||
|
uint64_t aux_offset;
|
||
|
uint64_t aux_size;
|
||
|
uint64_t flags;
|
||
|
};
|
||
|
|
||
|
PERF_RECORD_AUXTRACE_ERROR = 72,
|
||
|
|
||
|
Describes an error in hardware tracing
|
||
|
|
||
|
enum auxtrace_error_type {
|
||
|
PERF_AUXTRACE_ERROR_ITRACE = 1,
|
||
|
PERF_AUXTRACE_ERROR_MAX
|
||
|
};
|
||
|
|
||
|
#define MAX_AUXTRACE_ERROR_MSG 64
|
||
|
|
||
|
struct auxtrace_error_event {
|
||
|
struct perf_event_header header;
|
||
|
uint32_t type;
|
||
|
uint32_t code;
|
||
|
uint32_t cpu;
|
||
|
uint32_t pid;
|
||
|
uint32_t tid;
|
||
|
uint32_t reserved__; /* For alignment */
|
||
|
uint64_t ip;
|
||
|
char msg[MAX_AUXTRACE_ERROR_MSG];
|
||
|
};
|
||
|
|
||
|
PERF_RECORD_HEADER_FEATURE = 80,
|
||
|
|
||
|
Describes a header feature. These are records used in pipe-mode that
|
||
|
contain information that otherwise would be in perf.data file's header.
|
||
|
|
||
|
Event types
|
||
|
|
||
|
Define the event attributes with their IDs.
|
||
|
|
||
|
An array bound by the perf_file_section size.
|
||
|
|
||
|
struct {
|
||
|
struct perf_event_attr attr; /* Size defined by header.attr_size */
|
||
|
struct perf_file_section ids;
|
||
|
}
|
||
|
|
||
|
ids points to a array of uint64_t defining the ids for event attr attr.
|
||
|
|
||
|
Pipe-mode data
|
||
|
|
||
|
Pipe-mode avoid seeks in the file by removing the perf_file_section and flags
|
||
|
from the struct perf_header. The trimmed header is:
|
||
|
|
||
|
struct perf_pipe_file_header {
|
||
|
u64 magic;
|
||
|
u64 size;
|
||
|
};
|
||
|
|
||
|
The information about attrs, data, and event_types is instead in the
|
||
|
synthesized events PERF_RECORD_ATTR, PERF_RECORD_HEADER_TRACING_DATA,
|
||
|
PERF_RECORD_HEADER_EVENT_TYPE, and PERF_RECORD_HEADER_FEATURE
|
||
|
that are generated by perf record in pipe-mode.
|
||
|
|
||
|
|
||
|
References:
|
||
|
|
||
|
include/uapi/linux/perf_event.h
|
||
|
|
||
|
This is the canonical description of the kernel generated perf_events
|
||
|
and the perf_event_attrs.
|
||
|
|
||
|
perf_events manpage
|
||
|
|
||
|
A manpage describing perf_event and perf_event_attr is here:
|
||
|
http://web.eece.maine.edu/~vweaver/projects/perf_events/programming.html
|
||
|
This tends to be slightly behind the kernel include, but has better
|
||
|
descriptions. An (typically older) version of the man page may be
|
||
|
included with the standard Linux man pages, available with "man
|
||
|
perf_events"
|
||
|
|
||
|
pmu-tools
|
||
|
|
||
|
https://github.com/andikleen/pmu-tools/tree/master/parser
|
||
|
|
||
|
A definition of the perf.data format in python "construct" format is available
|
||
|
in pmu-tools parser. This allows to read perf.data from python and dump it.
|
||
|
|
||
|
quipper
|
||
|
|
||
|
The quipper C++ parser is available at
|
||
|
http://github.com/google/perf_data_converter/tree/master/src/quipper
|
||
|
|