Merge tag 'afs-fixes-20180514' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs

Pull AFS fixes from David Howells:
 "Here's a set of patches that fix a number of bugs in the in-kernel AFS
  client, including:

   - Fix directory locking to not use individual page locks for
     directory reading/scanning but rather to use a semaphore on the
     afs_vnode struct as the directory contents must be read in a single
     blob and data from different reads must not be mixed as the entire
     contents may be shuffled about between reads.

   - Fix address list parsing to handle port specifiers correctly.

   - Only give up callback records on a server if we actually talked to
     that server (we might not be able to access a server).

   - Fix some callback handling bugs, including refcounting,
     whole-volume callbacks and when callbacks actually get broken in
     response to a CB.CallBack op.

   - Fix some server/address rotation bugs, including giving up if we
     can't probe a server; giving up if a server says it doesn't have a
     volume, but there are more servers to try.

   - Fix the decoding of fetched statuses to be OpenAFS compatible.

   - Fix the handling of server lookups in Cache Manager ops (such as
     CB.InitCallBackState3) to use a UUID if possible and to handle no
     server being found.

   - Fix a bug in server lookup where not all addresses are compared.

   - Fix the non-encryption of calls that prevents some servers from
     being accessed (this also requires an AF_RXRPC patch that has
     already gone in through the net tree).

  There's also a patch that adds tracepoints to log Cache Manager ops
  that don't find a matching server, either by UUID or by address"

* tag 'afs-fixes-20180514' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs:
  afs: Fix the non-encryption of calls
  afs: Fix CB.CallBack handling
  afs: Fix whole-volume callback handling
  afs: Fix afs_find_server search loop
  afs: Fix the handling of an unfound server in CM operations
  afs: Add a tracepoint to record callbacks from unlisted servers
  afs: Fix the handling of CB.InitCallBackState3 to find the server by UUID
  afs: Fix VNOVOL handling in address rotation
  afs: Fix AFSFetchStatus decoder to provide OpenAFS compatibility
  afs: Fix server rotation's handling of fileserver probe failure
  afs: Fix refcounting in callback registration
  afs: Fix giving up callbacks on server destruction
  afs: Fix address list parsing
  afs: Fix directory page locking
This commit is contained in:
Linus Torvalds 2018-05-15 10:48:36 -07:00
commit 16c00db4bb
1015 changed files with 315359 additions and 0 deletions

36
.gitignore vendored Normal file
View file

@ -0,0 +1,36 @@
PERF-CFLAGS
PERF-GUI-VARS
PERF-VERSION-FILE
FEATURE-DUMP
perf
perf-read-vdso32
perf-read-vdsox32
perf-help
perf-record
perf-report
perf-stat
perf-top
perf*.1
perf*.xml
perf*.html
common-cmds.h
perf.data
perf.data.old
output.svg
perf-archive
perf-with-kcore
tags
TAGS
cscope*
config.mak
config.mak.autogen
*-bison.*
*-flex.*
*.pyc
*.pyo
.config-detected
util/intel-pt-decoder/inat-tables.c
arch/*/include/generated/
trace/beauty/generated/
pmu-events/pmu-events.c
pmu-events/jevents

55
Build Normal file
View file

@ -0,0 +1,55 @@
perf-y += builtin-bench.o
perf-y += builtin-annotate.o
perf-y += builtin-config.o
perf-y += builtin-diff.o
perf-y += builtin-evlist.o
perf-y += builtin-ftrace.o
perf-y += builtin-help.o
perf-y += builtin-sched.o
perf-y += builtin-buildid-list.o
perf-y += builtin-buildid-cache.o
perf-y += builtin-kallsyms.o
perf-y += builtin-list.o
perf-y += builtin-record.o
perf-y += builtin-report.o
perf-y += builtin-stat.o
perf-y += builtin-timechart.o
perf-y += builtin-top.o
perf-y += builtin-script.o
perf-y += builtin-kmem.o
perf-y += builtin-lock.o
perf-y += builtin-kvm.o
perf-y += builtin-inject.o
perf-y += builtin-mem.o
perf-y += builtin-data.o
perf-y += builtin-version.o
perf-y += builtin-c2c.o
perf-$(CONFIG_TRACE) += builtin-trace.o
perf-$(CONFIG_LIBELF) += builtin-probe.o
perf-y += bench/
perf-y += tests/
perf-y += perf.o
paths += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))"
paths += -DPERF_INFO_PATH="BUILD_STR($(infodir_SQ))"
paths += -DPERF_MAN_PATH="BUILD_STR($(mandir_SQ))"
CFLAGS_builtin-help.o += $(paths)
CFLAGS_builtin-timechart.o += $(paths)
CFLAGS_perf.o += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" \
-DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" \
-DPREFIX="BUILD_STR($(prefix_SQ))"
CFLAGS_builtin-trace.o += -DSTRACE_GROUPS_DIR="BUILD_STR($(STRACE_GROUPS_DIR_SQ))"
CFLAGS_builtin-report.o += -DTIPDIR="BUILD_STR($(tipdir_SQ))"
CFLAGS_builtin-report.o += -DDOCDIR="BUILD_STR($(srcdir_SQ)/Documentation)"
libperf-y += util/
libperf-y += arch/
libperf-y += ui/
libperf-y += scripts/
libperf-$(CONFIG_TRACE) += trace/beauty/
gtk-y += ui/gtk/

30
CREDITS Normal file
View file

@ -0,0 +1,30 @@
Most of the infrastructure that 'perf' uses here has been reused
from the Git project, as of version:
66996ec: Sync with 1.6.2.4
Here is an (incomplete!) list of main contributors to those files
in util/* and elsewhere:
Alex Riesen
Christian Couder
Dmitry Potapov
Jeff King
Johannes Schindelin
Johannes Sixt
Junio C Hamano
Linus Torvalds
Matthias Kestenholz
Michal Ostrowski
Miklos Vajna
Petr Baudis
Pierre Habouzit
René Scharfe
Samuel Tardieu
Shawn O. Pearce
Steffen Prohaska
Steve Haslam
Thanks guys!
The full history of the files can be found in the upstream Git commits.

49
Documentation/Build.txt Normal file
View file

@ -0,0 +1,49 @@
1) perf build
=============
The perf build process consists of several separated building blocks,
which are linked together to form the perf binary:
- libperf library (static)
- perf builtin commands
- traceevent library (static)
- GTK ui library
Several makefiles govern the perf build:
- Makefile
top level Makefile working as a wrapper that calls the main
Makefile.perf with a -j option to do parallel builds.
- Makefile.perf
main makefile that triggers build of all perf objects including
installation and documentation processing.
- tools/build/Makefile.build
main makefile of the build framework
- tools/build/Build.include
build framework generic definitions
- Build makefiles
makefiles that defines build objects
Please refer to tools/build/Documentation/Build.txt for more
information about build framework.
2) perf build
=============
The Makefile.perf triggers the build framework for build objects:
perf, libperf, gtk
resulting in following objects:
$ ls *-in.o
gtk-in.o libperf-in.o perf-in.o
Those objects are then used in final linking:
libperf-gtk.so <- gtk-in.o libperf-in.o
perf <- perf-in.o libperf-in.o
NOTE this description is omitting other libraries involved, only
focusing on build framework outcomes

343
Documentation/Makefile Normal file
View file

@ -0,0 +1,343 @@
include ../../scripts/Makefile.include
include ../../scripts/utilities.mak
MAN1_TXT= \
$(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \
$(wildcard perf-*.txt)) \
perf.txt
MAN5_TXT=
MAN7_TXT=
MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT)
_MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT))
_MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT))
MAN_XML=$(addprefix $(OUTPUT),$(_MAN_XML))
MAN_HTML=$(addprefix $(OUTPUT),$(_MAN_HTML))
ARTICLES =
# with their own formatting rules.
SP_ARTICLES =
API_DOCS = $(patsubst %.txt,%,$(filter-out technical/api-index-skel.txt technical/api-index.txt, $(wildcard technical/api-*.txt)))
SP_ARTICLES += $(API_DOCS)
SP_ARTICLES += technical/api-index
_DOC_HTML = $(_MAN_HTML)
_DOC_HTML+=$(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES))
DOC_HTML=$(addprefix $(OUTPUT),$(_DOC_HTML))
_DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT))
_DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT))
_DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT))
DOC_MAN1=$(addprefix $(OUTPUT),$(_DOC_MAN1))
DOC_MAN5=$(addprefix $(OUTPUT),$(_DOC_MAN5))
DOC_MAN7=$(addprefix $(OUTPUT),$(_DOC_MAN7))
# Make the path relative to DESTDIR, not prefix
ifndef DESTDIR
prefix?=$(HOME)
endif
bindir?=$(prefix)/bin
htmldir?=$(prefix)/share/doc/perf-doc
pdfdir?=$(prefix)/share/doc/perf-doc
mandir?=$(prefix)/share/man
man1dir=$(mandir)/man1
man5dir=$(mandir)/man5
man7dir=$(mandir)/man7
ASCIIDOC=asciidoc
ASCIIDOC_EXTRA = --unsafe
MANPAGE_XSL = manpage-normal.xsl
XMLTO_EXTRA =
INSTALL?=install
RM ?= rm -f
DOC_REF = origin/man
HTML_REF = origin/html
infodir?=$(prefix)/share/info
MAKEINFO=makeinfo
INSTALL_INFO=install-info
DOCBOOK2X_TEXI=docbook2x-texi
DBLATEX=dblatex
XMLTO=xmlto
ifndef PERL_PATH
PERL_PATH = /usr/bin/perl
endif
-include ../config.mak.autogen
-include ../config.mak
_tmp_tool_path := $(call get-executable,$(ASCIIDOC))
ifeq ($(_tmp_tool_path),)
missing_tools = $(ASCIIDOC)
endif
_tmp_tool_path := $(call get-executable,$(XMLTO))
ifeq ($(_tmp_tool_path),)
missing_tools += $(XMLTO)
endif
#
# For asciidoc ...
# -7.1.2, no extra settings are needed.
# 8.0-, set ASCIIDOC8.
#
#
# For docbook-xsl ...
# -1.68.1, set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0)
# 1.69.0, no extra settings are needed?
# 1.69.1-1.71.0, set DOCBOOK_SUPPRESS_SP?
# 1.71.1, no extra settings are needed?
# 1.72.0, set DOCBOOK_XSL_172.
# 1.73.0-, set ASCIIDOC_NO_ROFF
#
#
# If you had been using DOCBOOK_XSL_172 in an attempt to get rid
# of 'the ".ft C" problem' in your generated manpages, and you
# instead ended up with weird characters around callouts, try
# using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8).
#
ifdef ASCIIDOC8
ASCIIDOC_EXTRA += -a asciidoc7compatible
endif
ifdef DOCBOOK_XSL_172
ASCIIDOC_EXTRA += -a perf-asciidoc-no-roff
MANPAGE_XSL = manpage-1.72.xsl
else
ifdef ASCIIDOC_NO_ROFF
# docbook-xsl after 1.72 needs the regular XSL, but will not
# pass-thru raw roff codes from asciidoc.conf, so turn them off.
ASCIIDOC_EXTRA += -a perf-asciidoc-no-roff
endif
endif
ifdef MAN_BOLD_LITERAL
XMLTO_EXTRA += -m manpage-bold-literal.xsl
endif
ifdef DOCBOOK_SUPPRESS_SP
XMLTO_EXTRA += -m manpage-suppress-sp.xsl
endif
SHELL_PATH ?= $(SHELL)
# Shell quote;
SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
#
# Please note that there is a minor bug in asciidoc.
# The version after 6.0.3 _will_ include the patch found here:
# http://marc.theaimsgroup.com/?l=perf&m=111558757202243&w=2
#
# Until that version is released you may have to apply the patch
# yourself - yes, all 6 characters of it!
#
QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir
QUIET_SUBDIR1 =
ifneq ($(findstring $(MAKEFLAGS),w),w)
PRINT_DIR = --no-print-directory
else # "make -w"
NO_SUBDIR = :
endif
ifneq ($(findstring $(MAKEFLAGS),s),s)
ifneq ($(V),1)
QUIET_ASCIIDOC = @echo ' ASCIIDOC '$@;
QUIET_XMLTO = @echo ' XMLTO '$@;
QUIET_DB2TEXI = @echo ' DB2TEXI '$@;
QUIET_MAKEINFO = @echo ' MAKEINFO '$@;
QUIET_DBLATEX = @echo ' DBLATEX '$@;
QUIET_XSLTPROC = @echo ' XSLTPROC '$@;
QUIET_GEN = @echo ' GEN '$@;
QUIET_STDERR = 2> /dev/null
QUIET_SUBDIR0 = +@subdir=
QUIET_SUBDIR1 = ;$(NO_SUBDIR) \
echo ' SUBDIR ' $$subdir; \
$(MAKE) $(PRINT_DIR) -C $$subdir
export V
endif
endif
all: html man
html: $(DOC_HTML)
$(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7): asciidoc.conf
man: man1 man5 man7
man1: $(DOC_MAN1)
man5: $(DOC_MAN5)
man7: $(DOC_MAN7)
info: $(OUTPUT)perf.info $(OUTPUT)perfman.info
pdf: $(OUTPUT)user-manual.pdf
install: install-man
check-man-tools:
ifdef missing_tools
$(error "You need to install $(missing_tools) for man pages")
endif
do-install-man: man
$(call QUIET_INSTALL, Documentation-man) \
$(INSTALL) -d -m 755 $(DESTDIR)$(man1dir); \
# $(INSTALL) -d -m 755 $(DESTDIR)$(man5dir); \
# $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir); \
$(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(man1dir); \
# $(INSTALL) -m 644 $(DOC_MAN5) $(DESTDIR)$(man5dir); \
# $(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
install-man: check-man-tools man do-install-man
ifdef missing_tools
DO_INSTALL_MAN = $(warning Please install $(missing_tools) to have the man pages installed)
else
DO_INSTALL_MAN = do-install-man
endif
try-install-man: $(DO_INSTALL_MAN)
install-info: info
$(call QUIET_INSTALL, Documentation-info) \
$(INSTALL) -d -m 755 $(DESTDIR)$(infodir); \
$(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir); \
if test -r $(DESTDIR)$(infodir)/dir; then \
$(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\
$(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\
else \
echo "No directory found in $(DESTDIR)$(infodir)" >&2 ; \
fi
install-pdf: pdf
$(call QUIET_INSTALL, Documentation-pdf) \
$(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir); \
$(INSTALL) -m 644 $(OUTPUT)user-manual.pdf $(DESTDIR)$(pdfdir)
#install-html: html
# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir)
#
# Determine "include::" file references in asciidoc files.
#
$(OUTPUT)doc.dep : $(wildcard *.txt) build-docdep.perl
$(QUIET_GEN)$(RM) $@+ $@ && \
$(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \
mv $@+ $@
-include $(OUPTUT)doc.dep
_cmds_txt = cmds-ancillaryinterrogators.txt \
cmds-ancillarymanipulators.txt \
cmds-mainporcelain.txt \
cmds-plumbinginterrogators.txt \
cmds-plumbingmanipulators.txt \
cmds-synchingrepositories.txt \
cmds-synchelpers.txt \
cmds-purehelpers.txt \
cmds-foreignscminterface.txt
cmds_txt=$(addprefix $(OUTPUT),$(_cmds_txt))
$(cmds_txt): $(OUTPUT)cmd-list.made
$(OUTPUT)cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT)
$(QUIET_GEN)$(RM) $@ && \
$(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \
date >$@
CLEAN_FILES = \
$(MAN_XML) $(addsuffix +,$(MAN_XML)) \
$(MAN_HTML) $(addsuffix +,$(MAN_HTML)) \
$(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7) \
$(OUTPUT)*.texi $(OUTPUT)*.texi+ $(OUTPUT)*.texi++ \
$(OUTPUT)perf.info $(OUTPUT)perfman.info \
$(OUTPUT)howto-index.txt $(OUTPUT)howto/*.html $(OUTPUT)doc.dep \
$(OUTPUT)technical/api-*.html $(OUTPUT)technical/api-index.txt \
$(cmds_txt) $(OUTPUT)*.made
clean:
$(call QUIET_CLEAN, Documentation) $(RM) $(CLEAN_FILES)
$(MAN_HTML): $(OUTPUT)%.html : %.txt
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
$(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \
$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
mv $@+ $@
$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml
$(QUIET_XMLTO)$(RM) $@ && \
$(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
$(OUTPUT)%.xml : %.txt
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
$(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \
$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
mv $@+ $@
XSLT = docbook.xsl
XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css
$(OUTPUT)user-manual.html: $(OUTPUT)user-manual.xml
$(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $<
$(OUTPUT)perf.info: $(OUTPUT)user-manual.texi
$(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ $(OUTPUT)user-manual.texi
$(OUTPUT)user-manual.texi: $(OUTPUT)user-manual.xml
$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
$(DOCBOOK2X_TEXI) $(OUTPUT)user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \
$(PERL_PATH) fix-texi.perl <$@++ >$@+ && \
rm $@++ && \
mv $@+ $@
$(OUTPUT)user-manual.pdf: $(OUTPUT)user-manual.xml
$(QUIET_DBLATEX)$(RM) $@+ $@ && \
$(DBLATEX) -o $@+ -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \
mv $@+ $@
$(OUTPUT)perfman.texi: $(MAN_XML) cat-texi.perl
$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
($(foreach xml,$(MAN_XML),$(DOCBOOK2X_TEXI) --encoding=UTF-8 \
--to-stdout $(xml) &&) true) > $@++ && \
$(PERL_PATH) cat-texi.perl $@ <$@++ >$@+ && \
rm $@++ && \
mv $@+ $@
$(OUTPUT)perfman.info: $(OUTPUT)perfman.texi
$(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi
$(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml
$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
$(DOCBOOK2X_TEXI) --to-stdout $*.xml >$@+ && \
mv $@+ $@
howto-index.txt: howto-index.sh $(wildcard howto/*.txt)
$(QUIET_GEN)$(RM) $@+ $@ && \
'$(SHELL_PATH_SQ)' ./howto-index.sh $(wildcard howto/*.txt) >$@+ && \
mv $@+ $@
$(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt
$(QUIET_ASCIIDOC)$(ASCIIDOC) -b xhtml11 $*.txt
WEBDOC_DEST = /pub/software/tools/perf/docs
$(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b xhtml11 - >$@+ && \
mv $@+ $@
# UNIMPLEMENTED
#install-webdoc : html
# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(WEBDOC_DEST)
# quick-install: quick-install-man
# quick-install-man:
# '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(DOC_REF) $(DESTDIR)$(mandir)
#quick-install-html:
# '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir)

78
Documentation/android.txt Normal file
View file

@ -0,0 +1,78 @@
How to compile perf for Android
=========================================
I. Set the Android NDK environment
------------------------------------------------
(a). Use the Android NDK
------------------------------------------------
1. You need to download and install the Android Native Development Kit (NDK).
Set the NDK variable to point to the path where you installed the NDK:
export NDK=/path/to/android-ndk
2. Set cross-compiling environment variables for NDK toolchain and sysroot.
For arm:
export NDK_TOOLCHAIN=${NDK}/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-
export NDK_SYSROOT=${NDK}/platforms/android-24/arch-arm
For x86:
export NDK_TOOLCHAIN=${NDK}/toolchains/x86-4.9/prebuilt/linux-x86_64/bin/i686-linux-android-
export NDK_SYSROOT=${NDK}/platforms/android-24/arch-x86
This method is only tested for Android NDK versions Revision 11b and later.
perf uses some bionic enhancements that are not included in prior NDK versions.
You can use method (b) described below instead.
(b). Use the Android source tree
-----------------------------------------------
1. Download the master branch of the Android source tree.
Set the environment for the target you want using:
source build/envsetup.sh
lunch
2. Build your own NDK sysroot to contain latest bionic changes and set the
NDK sysroot environment variable.
cd ${ANDROID_BUILD_TOP}/ndk
For arm:
./build/tools/build-ndk-sysroot.sh --abi=arm
export NDK_SYSROOT=${ANDROID_BUILD_TOP}/ndk/build/platforms/android-3/arch-arm
For x86:
./build/tools/build-ndk-sysroot.sh --abi=x86
export NDK_SYSROOT=${ANDROID_BUILD_TOP}/ndk/build/platforms/android-3/arch-x86
3. Set the NDK toolchain environment variable.
For arm:
export NDK_TOOLCHAIN=${ANDROID_TOOLCHAIN}/arm-linux-androideabi-
For x86:
export NDK_TOOLCHAIN=${ANDROID_TOOLCHAIN}/i686-linux-android-
II. Compile perf for Android
------------------------------------------------
You need to run make with the NDK toolchain and sysroot defined above:
For arm:
make WERROR=0 ARCH=arm CROSS_COMPILE=${NDK_TOOLCHAIN} EXTRA_CFLAGS="-pie --sysroot=${NDK_SYSROOT}"
For x86:
make WERROR=0 ARCH=x86 CROSS_COMPILE=${NDK_TOOLCHAIN} EXTRA_CFLAGS="-pie --sysroot=${NDK_SYSROOT}"
III. Install perf
-----------------------------------------------
You need to connect to your Android device/emulator using adb.
Install perf using:
adb push perf /data/perf
If you also want to use perf-archive you need busybox tools for Android.
For installing perf-archive, you first need to replace #!/bin/bash with #!/system/bin/sh:
sed 's/#!\/bin\/bash/#!\/system\/bin\/sh/g' perf-archive >> /tmp/perf-archive
chmod +x /tmp/perf-archive
adb push /tmp/perf-archive /data/perf-archive
IV. Environment settings for running perf
------------------------------------------------
Some perf features need environment variables to run properly.
You need to set these before running perf on the target:
adb shell
# PERF_PAGER=cat
IV. Run perf
------------------------------------------------
Run perf on your device/emulator to which you previously connected using adb:
# ./data/perf

View file

@ -0,0 +1,91 @@
## linkperf: macro
#
# Usage: linkperf:command[manpage-section]
#
# Note, {0} is the manpage section, while {target} is the command.
#
# Show PERF link as: <command>(<section>); if section is defined, else just show
# the command.
[macros]
(?su)[\\]?(?P<name>linkperf):(?P<target>\S*?)\[(?P<attrlist>.*?)\]=
[attributes]
asterisk=&#42;
plus=&#43;
caret=&#94;
startsb=&#91;
endsb=&#93;
tilde=&#126;
ifdef::backend-docbook[]
[linkperf-inlinemacro]
{0%{target}}
{0#<citerefentry>}
{0#<refentrytitle>{target}</refentrytitle><manvolnum>{0}</manvolnum>}
{0#</citerefentry>}
endif::backend-docbook[]
ifdef::backend-docbook[]
ifndef::perf-asciidoc-no-roff[]
# "unbreak" docbook-xsl v1.68 for manpages. v1.69 works with or without this.
# v1.72 breaks with this because it replaces dots not in roff requests.
[listingblock]
<example><title>{title}</title>
<literallayout>
ifdef::doctype-manpage[]
&#10;.ft C&#10;
endif::doctype-manpage[]
|
ifdef::doctype-manpage[]
&#10;.ft&#10;
endif::doctype-manpage[]
</literallayout>
{title#}</example>
endif::perf-asciidoc-no-roff[]
ifdef::perf-asciidoc-no-roff[]
ifdef::doctype-manpage[]
# The following two small workarounds insert a simple paragraph after screen
[listingblock]
<example><title>{title}</title>
<literallayout>
|
</literallayout><simpara></simpara>
{title#}</example>
[verseblock]
<formalpara{id? id="{id}"}><title>{title}</title><para>
{title%}<literallayout{id? id="{id}"}>
{title#}<literallayout>
|
</literallayout>
{title#}</para></formalpara>
{title%}<simpara></simpara>
endif::doctype-manpage[]
endif::perf-asciidoc-no-roff[]
endif::backend-docbook[]
ifdef::doctype-manpage[]
ifdef::backend-docbook[]
[header]
template::[header-declarations]
<refentry>
<refmeta>
<refentrytitle>{mantitle}</refentrytitle>
<manvolnum>{manvolnum}</manvolnum>
<refmiscinfo class="source">perf</refmiscinfo>
<refmiscinfo class="version">{perf_version}</refmiscinfo>
<refmiscinfo class="manual">perf Manual</refmiscinfo>
</refmeta>
<refnamediv>
<refname>{manname}</refname>
<refpurpose>{manpurpose}</refpurpose>
</refnamediv>
endif::backend-docbook[]
endif::doctype-manpage[]
ifdef::backend-xhtml11[]
[linkperf-inlinemacro]
<a href="{target}.html">{target}{0?({0})}</a>
endif::backend-xhtml11[]

View file

@ -0,0 +1,108 @@
Overhead calculation
--------------------
The overhead can be shown in two columns as 'Children' and 'Self' when
perf collects callchains. The 'self' overhead is simply calculated by
adding all period values of the entry - usually a function (symbol).
This is the value that perf shows traditionally and sum of all the
'self' overhead values should be 100%.
The 'children' overhead is calculated by adding all period values of
the child functions so that it can show the total overhead of the
higher level functions even if they don't directly execute much.
'Children' here means functions that are called from another (parent)
function.
It might be confusing that the sum of all the 'children' overhead
values exceeds 100% since each of them is already an accumulation of
'self' overhead of its child functions. But with this enabled, users
can find which function has the most overhead even if samples are
spread over the children.
Consider the following example; there are three functions like below.
-----------------------
void foo(void) {
/* do something */
}
void bar(void) {
/* do something */
foo();
}
int main(void) {
bar()
return 0;
}
-----------------------
In this case 'foo' is a child of 'bar', and 'bar' is an immediate
child of 'main' so 'foo' also is a child of 'main'. In other words,
'main' is a parent of 'foo' and 'bar', and 'bar' is a parent of 'foo'.
Suppose all samples are recorded in 'foo' and 'bar' only. When it's
recorded with callchains the output will show something like below
in the usual (self-overhead-only) output of perf report:
----------------------------------
Overhead Symbol
........ .....................
60.00% foo
|
--- foo
bar
main
__libc_start_main
40.00% bar
|
--- bar
main
__libc_start_main
----------------------------------
When the --children option is enabled, the 'self' overhead values of
child functions (i.e. 'foo' and 'bar') are added to the parents to
calculate the 'children' overhead. In this case the report could be
displayed as:
-------------------------------------------
Children Self Symbol
........ ........ ....................
100.00% 0.00% __libc_start_main
|
--- __libc_start_main
100.00% 0.00% main
|
--- main
__libc_start_main
100.00% 40.00% bar
|
--- bar
main
__libc_start_main
60.00% 60.00% foo
|
--- foo
bar
main
__libc_start_main
-------------------------------------------
In the above output, the 'self' overhead of 'foo' (60%) was add to the
'children' overhead of 'bar', 'main' and '\_\_libc_start_main'.
Likewise, the 'self' overhead of 'bar' (40%) was added to the
'children' overhead of 'main' and '\_\_libc_start_main'.
So '\_\_libc_start_main' and 'main' are shown first since they have
same (100%) 'children' overhead (even though they have zero 'self'
overhead) and they are the parents of 'foo' and 'bar'.
Since v3.16 the 'children' overhead is shown by default and the output
is sorted by its values. The 'children' overhead is disabled by
specifying --no-children option on the command line or by adding
'report.children = false' or 'top.children = false' in the perf config
file.

225
Documentation/examples.txt Normal file
View file

@ -0,0 +1,225 @@
------------------------------
****** perf by examples ******
------------------------------
[ From an e-mail by Ingo Molnar, http://lkml.org/lkml/2009/8/4/346 ]
First, discovery/enumeration of available counters can be done via
'perf list':
titan:~> perf list
[...]
kmem:kmalloc [Tracepoint event]
kmem:kmem_cache_alloc [Tracepoint event]
kmem:kmalloc_node [Tracepoint event]
kmem:kmem_cache_alloc_node [Tracepoint event]
kmem:kfree [Tracepoint event]
kmem:kmem_cache_free [Tracepoint event]
kmem:mm_page_free [Tracepoint event]
kmem:mm_page_free_batched [Tracepoint event]
kmem:mm_page_alloc [Tracepoint event]
kmem:mm_page_alloc_zone_locked [Tracepoint event]
kmem:mm_page_pcpu_drain [Tracepoint event]
kmem:mm_page_alloc_extfrag [Tracepoint event]
Then any (or all) of the above event sources can be activated and
measured. For example the page alloc/free properties of a 'hackbench
run' are:
titan:~> perf stat -e kmem:mm_page_pcpu_drain -e kmem:mm_page_alloc
-e kmem:mm_page_free_batched -e kmem:mm_page_free ./hackbench 10
Time: 0.575
Performance counter stats for './hackbench 10':
13857 kmem:mm_page_pcpu_drain
27576 kmem:mm_page_alloc
6025 kmem:mm_page_free_batched
20934 kmem:mm_page_free
0.613972165 seconds time elapsed
You can observe the statistical properties as well, by using the
'repeat the workload N times' feature of perf stat:
titan:~> perf stat --repeat 5 -e kmem:mm_page_pcpu_drain -e
kmem:mm_page_alloc -e kmem:mm_page_free_batched -e
kmem:mm_page_free ./hackbench 10
Time: 0.627
Time: 0.644
Time: 0.564
Time: 0.559
Time: 0.626
Performance counter stats for './hackbench 10' (5 runs):
12920 kmem:mm_page_pcpu_drain ( +- 3.359% )
25035 kmem:mm_page_alloc ( +- 3.783% )
6104 kmem:mm_page_free_batched ( +- 0.934% )
18376 kmem:mm_page_free ( +- 4.941% )
0.643954516 seconds time elapsed ( +- 2.363% )
Furthermore, these tracepoints can be used to sample the workload as
well. For example the page allocations done by a 'git gc' can be
captured the following way:
titan:~/git> perf record -e kmem:mm_page_alloc -c 1 ./git gc
Counting objects: 1148, done.
Delta compression using up to 2 threads.
Compressing objects: 100% (450/450), done.
Writing objects: 100% (1148/1148), done.
Total 1148 (delta 690), reused 1148 (delta 690)
[ perf record: Captured and wrote 0.267 MB perf.data (~11679 samples) ]
To check which functions generated page allocations:
titan:~/git> perf report
# Samples: 10646
#
# Overhead Command Shared Object
# ........ ............... ..........................
#
23.57% git-repack /lib64/libc-2.5.so
21.81% git /lib64/libc-2.5.so
14.59% git ./git
11.79% git-repack ./git
7.12% git /lib64/ld-2.5.so
3.16% git-repack /lib64/libpthread-2.5.so
2.09% git-repack /bin/bash
1.97% rm /lib64/libc-2.5.so
1.39% mv /lib64/ld-2.5.so
1.37% mv /lib64/libc-2.5.so
1.12% git-repack /lib64/ld-2.5.so
0.95% rm /lib64/ld-2.5.so
0.90% git-update-serv /lib64/libc-2.5.so
0.73% git-update-serv /lib64/ld-2.5.so
0.68% perf /lib64/libpthread-2.5.so
0.64% git-repack /usr/lib64/libz.so.1.2.3
Or to see it on a more finegrained level:
titan:~/git> perf report --sort comm,dso,symbol
# Samples: 10646
#
# Overhead Command Shared Object Symbol
# ........ ............... .......................... ......
#
9.35% git-repack ./git [.] insert_obj_hash
9.12% git ./git [.] insert_obj_hash
7.31% git /lib64/libc-2.5.so [.] memcpy
6.34% git-repack /lib64/libc-2.5.so [.] _int_malloc
6.24% git-repack /lib64/libc-2.5.so [.] memcpy
5.82% git-repack /lib64/libc-2.5.so [.] __GI___fork
5.47% git /lib64/libc-2.5.so [.] _int_malloc
2.99% git /lib64/libc-2.5.so [.] memset
Furthermore, call-graph sampling can be done too, of page
allocations - to see precisely what kind of page allocations there
are:
titan:~/git> perf record -g -e kmem:mm_page_alloc -c 1 ./git gc
Counting objects: 1148, done.
Delta compression using up to 2 threads.
Compressing objects: 100% (450/450), done.
Writing objects: 100% (1148/1148), done.
Total 1148 (delta 690), reused 1148 (delta 690)
[ perf record: Captured and wrote 0.963 MB perf.data (~42069 samples) ]
titan:~/git> perf report -g
# Samples: 10686
#
# Overhead Command Shared Object
# ........ ............... ..........................
#
23.25% git-repack /lib64/libc-2.5.so
|
|--50.00%-- _int_free
|
|--37.50%-- __GI___fork
| make_child
|
|--12.50%-- ptmalloc_unlock_all2
| make_child
|
--6.25%-- __GI_strcpy
21.61% git /lib64/libc-2.5.so
|
|--30.00%-- __GI_read
| |
| --83.33%-- git_config_from_file
| git_config
| |
[...]
Or you can observe the whole system's page allocations for 10
seconds:
titan:~/git> perf stat -a -e kmem:mm_page_pcpu_drain -e
kmem:mm_page_alloc -e kmem:mm_page_free_batched -e
kmem:mm_page_free sleep 10
Performance counter stats for 'sleep 10':
171585 kmem:mm_page_pcpu_drain
322114 kmem:mm_page_alloc
73623 kmem:mm_page_free_batched
254115 kmem:mm_page_free
10.000591410 seconds time elapsed
Or observe how fluctuating the page allocations are, via statistical
analysis done over ten 1-second intervals:
titan:~/git> perf stat --repeat 10 -a -e kmem:mm_page_pcpu_drain -e
kmem:mm_page_alloc -e kmem:mm_page_free_batched -e
kmem:mm_page_free sleep 1
Performance counter stats for 'sleep 1' (10 runs):
17254 kmem:mm_page_pcpu_drain ( +- 3.709% )
34394 kmem:mm_page_alloc ( +- 4.617% )
7509 kmem:mm_page_free_batched ( +- 4.820% )
25653 kmem:mm_page_free ( +- 3.672% )
1.058135029 seconds time elapsed ( +- 3.089% )
Or you can annotate the recorded 'git gc' run on a per symbol basis
and check which instructions/source-code generated page allocations:
titan:~/git> perf annotate __GI___fork
------------------------------------------------
Percent | Source code & Disassembly of libc-2.5.so
------------------------------------------------
:
:
: Disassembly of section .plt:
: Disassembly of section .text:
:
: 00000031a2e95560 <__fork>:
[...]
0.00 : 31a2e95602: b8 38 00 00 00 mov $0x38,%eax
0.00 : 31a2e95607: 0f 05 syscall
83.42 : 31a2e95609: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax
0.00 : 31a2e9560f: 0f 87 4d 01 00 00 ja 31a2e95762 <__fork+0x202>
0.00 : 31a2e95615: 85 c0 test %eax,%eax
( this shows that 83.42% of __GI___fork's page allocations come from
the 0x38 system call it performs. )
etc. etc. - a lot more is possible. I could list a dozen of
other different usecases straight away - neither of which is
possible via /proc/vmstat.
/proc/vmstat is not in the same league really, in terms of
expressive power of system analysis and performance
analysis.
All that the above results needed were those new tracepoints
in include/tracing/events/kmem.h.
Ingo

View file

@ -0,0 +1,86 @@
Intel Branch Trace Store
========================
Overview
========
Intel BTS could be regarded as a predecessor to Intel PT and has some
similarities because it can also identify every branch a program takes. A
notable difference is that Intel BTS has no timing information and as a
consequence the present implementation is limited to per-thread recording.
While decoding Intel BTS does not require walking the object code, the object
code is still needed to pair up calls and returns correctly, consequently much
of the Intel PT documentation applies also to Intel BTS. Refer to the Intel PT
documentation and consider that the PMU 'intel_bts' can usually be used in
place of 'intel_pt' in the examples provided, with the proviso that per-thread
recording must also be stipulated i.e. the --per-thread option for
'perf record'.
perf record
===========
new event
---------
The Intel BTS kernel driver creates a new PMU for Intel BTS. The perf record
option is:
-e intel_bts//
Currently Intel BTS is limited to per-thread tracing so the --per-thread option
is also needed.
snapshot option
---------------
The snapshot option is the same as Intel PT (refer Intel PT documentation).
auxtrace mmap size option
-----------------------
The mmap size option is the same as Intel PT (refer Intel PT documentation).
perf script
===========
By default, perf script will decode trace data found in the perf.data file.
This can be further controlled by option --itrace. The --itrace option is
the same as Intel PT (refer Intel PT documentation) except that neither
"instructions" events nor "transactions" events (and consequently call
chains) are supported.
To disable trace decoding entirely, use the option --no-itrace.
dump option
-----------
perf script has an option (-D) to "dump" the events i.e. display the binary
data.
When -D is used, Intel BTS packets are displayed.
To disable the display of Intel BTS packets, combine the -D option with
--no-itrace.
perf report
===========
By default, perf report will decode trace data found in the perf.data file.
This can be further controlled by new option --itrace exactly the same as
perf script.
perf inject
===========
perf inject also accepts the --itrace option in which case tracing data is
removed and replaced with the synthesized events. e.g.
perf inject --itrace -i perf.data -o perf.data.new

891
Documentation/intel-pt.txt Normal file
View file

@ -0,0 +1,891 @@
Intel Processor Trace
=====================
Overview
========
Intel Processor Trace (Intel PT) is an extension of Intel Architecture that
collects information about software execution such as control flow, execution
modes and timings and formats it into highly compressed binary packets.
Technical details are documented in the Intel 64 and IA-32 Architectures
Software Developer Manuals, Chapter 36 Intel Processor Trace.
Intel PT is first supported in Intel Core M and 5th generation Intel Core
processors that are based on the Intel micro-architecture code name Broadwell.
Trace data is collected by 'perf record' and stored within the perf.data file.
See below for options to 'perf record'.
Trace data must be 'decoded' which involves walking the object code and matching
the trace data packets. For example a TNT packet only tells whether a
conditional branch was taken or not taken, so to make use of that packet the
decoder must know precisely which instruction was being executed.
Decoding is done on-the-fly. The decoder outputs samples in the same format as
samples output by perf hardware events, for example as though the "instructions"
or "branches" events had been recorded. Presently 3 tools support this:
'perf script', 'perf report' and 'perf inject'. See below for more information
on using those tools.
The main distinguishing feature of Intel PT is that the decoder can determine
the exact flow of software execution. Intel PT can be used to understand why
and how did software get to a certain point, or behave a certain way. The
software does not have to be recompiled, so Intel PT works with debug or release
builds, however the executed images are needed - which makes use in JIT-compiled
environments, or with self-modified code, a challenge. Also symbols need to be
provided to make sense of addresses.
A limitation of Intel PT is that it produces huge amounts of trace data
(hundreds of megabytes per second per core) which takes a long time to decode,
for example two or three orders of magnitude longer than it took to collect.
Another limitation is the performance impact of tracing, something that will
vary depending on the use-case and architecture.
Quickstart
==========
It is important to start small. That is because it is easy to capture vastly
more data than can possibly be processed.
The simplest thing to do with Intel PT is userspace profiling of small programs.
Data is captured with 'perf record' e.g. to trace 'ls' userspace-only:
perf record -e intel_pt//u ls
And profiled with 'perf report' e.g.
perf report
To also trace kernel space presents a problem, namely kernel self-modifying
code. A fairly good kernel image is available in /proc/kcore but to get an
accurate image a copy of /proc/kcore needs to be made under the same conditions
as the data capture. A script perf-with-kcore can do that, but beware that the
script makes use of 'sudo' to copy /proc/kcore. If you have perf installed
locally from the source tree you can do:
~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls
which will create a directory named 'pt_ls' and put the perf.data file and
copies of /proc/kcore, /proc/kallsyms and /proc/modules into it. Then to use
'perf report' becomes:
~/libexec/perf-core/perf-with-kcore report pt_ls
Because samples are synthesized after-the-fact, the sampling period can be
selected for reporting. e.g. sample every microsecond
~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge
See the sections below for more information about the --itrace option.
Beware the smaller the period, the more samples that are produced, and the
longer it takes to process them.
Also note that the coarseness of Intel PT timing information will start to
distort the statistical value of the sampling as the sampling period becomes
smaller.
To represent software control flow, "branches" samples are produced. By default
a branch sample is synthesized for every single branch. To get an idea what
data is available you can use the 'perf script' tool with no parameters, which
will list all the samples.
perf record -e intel_pt//u ls
perf script
An interesting field that is not printed by default is 'flags' which can be
displayed as follows:
perf script -Fcomm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr,symoff,flags
The flags are "bcrosyiABEx" which stand for branch, call, return, conditional,
system, asynchronous, interrupt, transaction abort, trace begin, trace end, and
in transaction, respectively.
While it is possible to create scripts to analyze the data, an alternative
approach is available to export the data to a sqlite or postgresql database.
Refer to script export-to-sqlite.py or export-to-postgresql.py for more details,
and to script call-graph-from-sql.py for an example of using the database.
There is also script intel-pt-events.py which provides an example of how to
unpack the raw data for power events and PTWRITE.
As mentioned above, it is easy to capture too much data. One way to limit the
data captured is to use 'snapshot' mode which is explained further below.
Refer to 'new snapshot option' and 'Intel PT modes of operation' further below.
Another problem that will be experienced is decoder errors. They can be caused
by inability to access the executed image, self-modified or JIT-ed code, or the
inability to match side-band information (such as context switches and mmaps)
which results in the decoder not knowing what code was executed.
There is also the problem of perf not being able to copy the data fast enough,
resulting in data lost because the buffer was full. See 'Buffer handling' below
for more details.
perf record
===========
new event
---------
The Intel PT kernel driver creates a new PMU for Intel PT. PMU events are
selected by providing the PMU name followed by the "config" separated by slashes.
An enhancement has been made to allow default "config" e.g. the option
-e intel_pt//
will use a default config value. Currently that is the same as
-e intel_pt/tsc,noretcomp=0/
which is the same as
-e intel_pt/tsc=1,noretcomp=0/
Note there are now new config terms - see section 'config terms' further below.
The config terms are listed in /sys/devices/intel_pt/format. They are bit
fields within the config member of the struct perf_event_attr which is
passed to the kernel by the perf_event_open system call. They correspond to bit
fields in the IA32_RTIT_CTL MSR. Here is a list of them and their definitions:
$ grep -H . /sys/bus/event_source/devices/intel_pt/format/*
/sys/bus/event_source/devices/intel_pt/format/cyc:config:1
/sys/bus/event_source/devices/intel_pt/format/cyc_thresh:config:19-22
/sys/bus/event_source/devices/intel_pt/format/mtc:config:9
/sys/bus/event_source/devices/intel_pt/format/mtc_period:config:14-17
/sys/bus/event_source/devices/intel_pt/format/noretcomp:config:11
/sys/bus/event_source/devices/intel_pt/format/psb_period:config:24-27
/sys/bus/event_source/devices/intel_pt/format/tsc:config:10
Note that the default config must be overridden for each term i.e.
-e intel_pt/noretcomp=0/
is the same as:
-e intel_pt/tsc=1,noretcomp=0/
So, to disable TSC packets use:
-e intel_pt/tsc=0/
It is also possible to specify the config value explicitly:
-e intel_pt/config=0x400/
Note that, as with all events, the event is suffixed with event modifiers:
u userspace
k kernel
h hypervisor
G guest
H host
p precise ip
'h', 'G' and 'H' are for virtualization which is not supported by Intel PT.
'p' is also not relevant to Intel PT. So only options 'u' and 'k' are
meaningful for Intel PT.
perf_event_attr is displayed if the -vv option is used e.g.
------------------------------------------------------------
perf_event_attr:
type 6
size 112
config 0x400
{ sample_period, sample_freq } 1
sample_type IP|TID|TIME|CPU|IDENTIFIER
read_format ID
disabled 1
inherit 1
exclude_kernel 1
exclude_hv 1
enable_on_exec 1
sample_id_all 1
------------------------------------------------------------
sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
------------------------------------------------------------
config terms
------------
The June 2015 version of Intel 64 and IA-32 Architectures Software Developer
Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features.
Some of the features are reflect in new config terms. All the config terms are
described below.
tsc Always supported. Produces TSC timestamp packets to provide
timing information. In some cases it is possible to decode
without timing information, for example a per-thread context
that does not overlap executable memory maps.
The default config selects tsc (i.e. tsc=1).
noretcomp Always supported. Disables "return compression" so a TIP packet
is produced when a function returns. Causes more packets to be
produced but might make decoding more reliable.
The default config does not select noretcomp (i.e. noretcomp=0).
psb_period Allows the frequency of PSB packets to be specified.
The PSB packet is a synchronization packet that provides a
starting point for decoding or recovery from errors.
Support for psb_period is indicated by:
/sys/bus/event_source/devices/intel_pt/caps/psb_cyc
which contains "1" if the feature is supported and "0"
otherwise.
Valid values are given by:
/sys/bus/event_source/devices/intel_pt/caps/psb_periods
which contains a hexadecimal value, the bits of which represent
valid values e.g. bit 2 set means value 2 is valid.
The psb_period value is converted to the approximate number of
trace bytes between PSB packets as:
2 ^ (value + 11)
e.g. value 3 means 16KiB bytes between PSBs
If an invalid value is entered, the error message
will give a list of valid values e.g.
$ perf record -e intel_pt/psb_period=15/u uname
Invalid psb_period for intel_pt. Valid values are: 0-5
If MTC packets are selected, the default config selects a value
of 3 (i.e. psb_period=3) or the nearest lower value that is
supported (0 is always supported). Otherwise the default is 0.
If decoding is expected to be reliable and the buffer is large
then a large PSB period can be used.
Because a TSC packet is produced with PSB, the PSB period can
also affect the granularity to timing information in the absence
of MTC or CYC.
mtc Produces MTC timing packets.
MTC packets provide finer grain timestamp information than TSC
packets. MTC packets record time using the hardware crystal
clock (CTC) which is related to TSC packets using a TMA packet.
Support for this feature is indicated by:
/sys/bus/event_source/devices/intel_pt/caps/mtc
which contains "1" if the feature is supported and
"0" otherwise.
The frequency of MTC packets can also be specified - see
mtc_period below.
mtc_period Specifies how frequently MTC packets are produced - see mtc
above for how to determine if MTC packets are supported.
Valid values are given by:
/sys/bus/event_source/devices/intel_pt/caps/mtc_periods
which contains a hexadecimal value, the bits of which represent
valid values e.g. bit 2 set means value 2 is valid.
The mtc_period value is converted to the MTC frequency as:
CTC-frequency / (2 ^ value)
e.g. value 3 means one eighth of CTC-frequency
Where CTC is the hardware crystal clock, the frequency of which
can be related to TSC via values provided in cpuid leaf 0x15.
If an invalid value is entered, the error message
will give a list of valid values e.g.
$ perf record -e intel_pt/mtc_period=15/u uname
Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9
The default value is 3 or the nearest lower value
that is supported (0 is always supported).
cyc Produces CYC timing packets.
CYC packets provide even finer grain timestamp information than
MTC and TSC packets. A CYC packet contains the number of CPU
cycles since the last CYC packet. Unlike MTC and TSC packets,
CYC packets are only sent when another packet is also sent.
Support for this feature is indicated by:
/sys/bus/event_source/devices/intel_pt/caps/psb_cyc
which contains "1" if the feature is supported and
"0" otherwise.
The number of CYC packets produced can be reduced by specifying
a threshold - see cyc_thresh below.
cyc_thresh Specifies how frequently CYC packets are produced - see cyc
above for how to determine if CYC packets are supported.
Valid cyc_thresh values are given by:
/sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds
which contains a hexadecimal value, the bits of which represent
valid values e.g. bit 2 set means value 2 is valid.
The cyc_thresh value represents the minimum number of CPU cycles
that must have passed before a CYC packet can be sent. The
number of CPU cycles is:
2 ^ (value - 1)
e.g. value 4 means 8 CPU cycles must pass before a CYC packet
can be sent. Note a CYC packet is still only sent when another
packet is sent, not at, e.g. every 8 CPU cycles.
If an invalid value is entered, the error message
will give a list of valid values e.g.
$ perf record -e intel_pt/cyc,cyc_thresh=15/u uname
Invalid cyc_thresh for intel_pt. Valid values are: 0-12
CYC packets are not requested by default.
pt Specifies pass-through which enables the 'branch' config term.
The default config selects 'pt' if it is available, so a user will
never need to specify this term.
branch Enable branch tracing. Branch tracing is enabled by default so to
disable branch tracing use 'branch=0'.
The default config selects 'branch' if it is available.
ptw Enable PTWRITE packets which are produced when a ptwrite instruction
is executed.
Support for this feature is indicated by:
/sys/bus/event_source/devices/intel_pt/caps/ptwrite
which contains "1" if the feature is supported and
"0" otherwise.
fup_on_ptw Enable a FUP packet to follow the PTWRITE packet. The FUP packet
provides the address of the ptwrite instruction. In the absence of
fup_on_ptw, the decoder will use the address of the previous branch
if branch tracing is enabled, otherwise the address will be zero.
Note that fup_on_ptw will work even when branch tracing is disabled.
pwr_evt Enable power events. The power events provide information about
changes to the CPU C-state.
Support for this feature is indicated by:
/sys/bus/event_source/devices/intel_pt/caps/power_event_trace
which contains "1" if the feature is supported and
"0" otherwise.
new snapshot option
-------------------
The difference between full trace and snapshot from the kernel's perspective is
that in full trace we don't overwrite trace data that the user hasn't collected
yet (and indicated that by advancing aux_tail), whereas in snapshot mode we let
the trace run and overwrite older data in the buffer so that whenever something
interesting happens, we can stop it and grab a snapshot of what was going on
around that interesting moment.
To select snapshot mode a new option has been added:
-S
Optionally it can be followed by the snapshot size e.g.
-S0x100000
The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size
nor snapshot size is specified, then the default is 4MiB for privileged users
(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
If an unprivileged user does not specify mmap pages, the mmap pages will be
reduced as described in the 'new auxtrace mmap size option' section below.
The snapshot size is displayed if the option -vv is used e.g.
Intel PT snapshot size: %zu
new auxtrace mmap size option
---------------------------
Intel PT buffer size is specified by an addition to the -m option e.g.
-m,16
selects a buffer size of 16 pages i.e. 64KiB.
Note that the existing functionality of -m is unchanged. The auxtrace mmap size
is specified by the optional addition of a comma and the value.
The default auxtrace mmap size for Intel PT is 4MiB/page_size for privileged users
(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
If an unprivileged user does not specify mmap pages, the mmap pages will be
reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the
user is likely to get an error as they exceed their mlock limit (Max locked
memory as shown in /proc/self/limits). Note that perf does not count the first
512KiB (actually /proc/sys/kernel/perf_event_mlock_kb minus 1 page) per cpu
against the mlock limit so an unprivileged user is allowed 512KiB per cpu plus
their mlock limit (which defaults to 64KiB but is not multiplied by the number
of cpus).
In full-trace mode, powers of two are allowed for buffer size, with a minimum
size of 2 pages. In snapshot mode, it is the same but the minimum size is
1 page.
The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g.
mmap length 528384
auxtrace mmap length 4198400
Intel PT modes of operation
---------------------------
Intel PT can be used in 2 modes:
full-trace mode
snapshot mode
Full-trace mode traces continuously e.g.
perf record -e intel_pt//u uname
Snapshot mode captures the available data when a signal is sent e.g.
perf record -v -e intel_pt//u -S ./loopy 1000000000 &
[1] 11435
kill -USR2 11435
Recording AUX area tracing snapshot
Note that the signal sent is SIGUSR2.
Note that "Recording AUX area tracing snapshot" is displayed because the -v
option is used.
The 2 modes cannot be used together.
Buffer handling
---------------
There may be buffer limitations (i.e. single ToPa entry) which means that actual
buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER). In order to
provide other sizes, and in particular an arbitrarily large size, multiple
buffers are logically concatenated. However an interrupt must be used to switch
between buffers. That has two potential problems:
a) the interrupt may not be handled in time so that the current buffer
becomes full and some trace data is lost.
b) the interrupts may slow the system and affect the performance
results.
If trace data is lost, the driver sets 'truncated' in the PERF_RECORD_AUX event
which the tools report as an error.
In full-trace mode, the driver waits for data to be copied out before allowing
the (logical) buffer to wrap-around. If data is not copied out quickly enough,
again 'truncated' is set in the PERF_RECORD_AUX event. If the driver has to
wait, the intel_pt event gets disabled. Because it is difficult to know when
that happens, perf tools always re-enable the intel_pt event after copying out
data.
Intel PT and build ids
----------------------
By default "perf record" post-processes the event stream to find all build ids
for executables for all addresses sampled. Deliberately, Intel PT is not
decoded for that purpose (it would take too long). Instead the build ids for
all executables encountered (due to mmap, comm or task events) are included
in the perf.data file.
To see buildids included in the perf.data file use the command:
perf buildid-list
If the perf.data file contains Intel PT data, that is the same as:
perf buildid-list --with-hits
Snapshot mode and event disabling
---------------------------------
In order to make a snapshot, the intel_pt event is disabled using an IOCTL,
namely PERF_EVENT_IOC_DISABLE. However doing that can also disable the
collection of side-band information. In order to prevent that, a dummy
software event has been introduced that permits tracking events (like mmaps) to
continue to be recorded while intel_pt is disabled. That is important to ensure
there is complete side-band information to allow the decoding of subsequent
snapshots.
A test has been created for that. To find the test:
perf test list
...
23: Test using a dummy software event to keep tracking
To run the test:
perf test 23
23: Test using a dummy software event to keep tracking : Ok
perf record modes (nothing new here)
------------------------------------
perf record essentially operates in one of three modes:
per thread
per cpu
workload only
"per thread" mode is selected by -t or by --per-thread (with -p or -u or just a
workload).
"per cpu" is selected by -C or -a.
"workload only" mode is selected by not using the other options but providing a
command to run (i.e. the workload).
In per-thread mode an exact list of threads is traced. There is no inheritance.
Each thread has its own event buffer.
In per-cpu mode all processes (or processes from the selected cgroup i.e. -G
option, or processes selected with -p or -u) are traced. Each cpu has its own
buffer. Inheritance is allowed.
In workload-only mode, the workload is traced but with per-cpu buffers.
Inheritance is allowed. Note that you can now trace a workload in per-thread
mode by using the --per-thread option.
Privileged vs non-privileged users
----------------------------------
Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users
have memory limits imposed upon them. That affects what buffer sizes they can
have as outlined above.
The v4.2 kernel introduced support for a context switch metadata event,
PERF_RECORD_SWITCH, which allows unprivileged users to see when their processes
are scheduled out and in, just not by whom, which is left for the
PERF_RECORD_SWITCH_CPU_WIDE, that is only accessible in system wide context,
which in turn requires CAP_SYS_ADMIN.
Please see the 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context
switches") commit, that introduces these metadata events for further info.
When working with kernels < v4.2, the following considerations must be taken,
as the sched:sched_switch tracepoints will be used to receive such information:
Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are
not permitted to use tracepoints which means there is insufficient side-band
information to decode Intel PT in per-cpu mode, and potentially workload-only
mode too if the workload creates new processes.
Note also, that to use tracepoints, read-access to debugfs is required. So if
debugfs is not mounted or the user does not have read-access, it will again not
be possible to decode Intel PT in per-cpu mode.
sched_switch tracepoint
-----------------------
The sched_switch tracepoint is used to provide side-band data for Intel PT
decoding in kernels where the PERF_RECORD_SWITCH metadata event isn't
available.
The sched_switch events are automatically added. e.g. the second event shown
below:
$ perf record -vv -e intel_pt//u uname
------------------------------------------------------------
perf_event_attr:
type 6
size 112
config 0x400
{ sample_period, sample_freq } 1
sample_type IP|TID|TIME|CPU|IDENTIFIER
read_format ID
disabled 1
inherit 1
exclude_kernel 1
exclude_hv 1
enable_on_exec 1
sample_id_all 1
------------------------------------------------------------
sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
------------------------------------------------------------
perf_event_attr:
type 2
size 112
config 0x108
{ sample_period, sample_freq } 1
sample_type IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER
read_format ID
inherit 1
sample_id_all 1
exclude_guest 1
------------------------------------------------------------
sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8
sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8
sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8
sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8
------------------------------------------------------------
perf_event_attr:
type 1
size 112
config 0x9
{ sample_period, sample_freq } 1
sample_type IP|TID|TIME|IDENTIFIER
read_format ID
disabled 1
inherit 1
exclude_kernel 1
exclude_hv 1
mmap 1
comm 1
enable_on_exec 1
task 1
sample_id_all 1
mmap2 1
comm_exec 1
------------------------------------------------------------
sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
mmap size 528384B
AUX area mmap length 4194304
perf event ring buffer mmapped per cpu
Synthesizing auxtrace information
Linux
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.042 MB perf.data ]
Note, the sched_switch event is only added if the user is permitted to use it
and only in per-cpu mode.
Note also, the sched_switch event is only added if TSC packets are requested.
That is because, in the absence of timing information, the sched_switch events
cannot be matched against the Intel PT trace.
perf script
===========
By default, perf script will decode trace data found in the perf.data file.
This can be further controlled by new option --itrace.
New --itrace option
-------------------
Having no option is the same as
--itrace
which, in turn, is the same as
--itrace=ibxwpe
The letters are:
i synthesize "instructions" events
b synthesize "branches" events
x synthesize "transactions" events
w synthesize "ptwrite" events
p synthesize "power" events
c synthesize branches events (calls only)
r synthesize branches events (returns only)
e synthesize tracing error events
d create a debug log
g synthesize a call chain (use with i or x)
l synthesize last branch entries (use with i or x)
s skip initial number of events
"Instructions" events look like they were recorded by "perf record -e
instructions".
"Branches" events look like they were recorded by "perf record -e branches". "c"
and "r" can be combined to get calls and returns.
"Transactions" events correspond to the start or end of transactions. The
'flags' field can be used in perf script to determine whether the event is a
tranasaction start, commit or abort.
Note that "instructions", "branches" and "transactions" events depend on code
flow packets which can be disabled by using the config term "branch=0". Refer
to the config terms section above.
"ptwrite" events record the payload of the ptwrite instruction and whether
"fup_on_ptw" was used. "ptwrite" events depend on PTWRITE packets which are
recorded only if the "ptw" config term was used. Refer to the config terms
section above. perf script "synth" field displays "ptwrite" information like
this: "ip: 0 payload: 0x123456789abcdef0" where "ip" is 1 if "fup_on_ptw" was
used.
"Power" events correspond to power event packets and CBR (core-to-bus ratio)
packets. While CBR packets are always recorded when tracing is enabled, power
event packets are recorded only if the "pwr_evt" config term was used. Refer to
the config terms section above. The power events record information about
C-state changes, whereas CBR is indicative of CPU frequency. perf script
"event,synth" fields display information like this:
cbr: cbr: 22 freq: 2189 MHz (200%)
mwait: hints: 0x60 extensions: 0x1
pwre: hw: 0 cstate: 2 sub-cstate: 0
exstop: ip: 1
pwrx: deepest cstate: 2 last cstate: 2 wake reason: 0x4
Where:
"cbr" includes the frequency and the percentage of maximum non-turbo
"mwait" shows mwait hints and extensions
"pwre" shows C-state transitions (to a C-state deeper than C0) and
whether initiated by hardware
"exstop" indicates execution stopped and whether the IP was recorded
exactly,
"pwrx" indicates return to C0
For more details refer to the Intel 64 and IA-32 Architectures Software
Developer Manuals.
Error events show where the decoder lost the trace. Error events
are quite important. Users must know if what they are seeing is a complete
picture or not.
The "d" option will cause the creation of a file "intel_pt.log" containing all
decoded packets and instructions. Note that this option slows down the decoder
and that the resulting file may be very large.
In addition, the period of the "instructions" event can be specified. e.g.
--itrace=i10us
sets the period to 10us i.e. one instruction sample is synthesized for each 10
microseconds of trace. Alternatives to "us" are "ms" (milliseconds),
"ns" (nanoseconds), "t" (TSC ticks) or "i" (instructions).
"ms", "us" and "ns" are converted to TSC ticks.
The timing information included with Intel PT does not give the time of every
instruction. Consequently, for the purpose of sampling, the decoder estimates
the time since the last timing packet based on 1 tick per instruction. The time
on the sample is *not* adjusted and reflects the last known value of TSC.
For Intel PT, the default period is 100us.
Setting it to a zero period means "as often as possible".
In the case of Intel PT that is the same as a period of 1 and a unit of
'instructions' (i.e. --itrace=i1i).
Also the call chain size (default 16, max. 1024) for instructions or
transactions events can be specified. e.g.
--itrace=ig32
--itrace=xg32
Also the number of last branch entries (default 64, max. 1024) for instructions or
transactions events can be specified. e.g.
--itrace=il10
--itrace=xl10
Note that last branch entries are cleared for each sample, so there is no overlap
from one sample to the next.
To disable trace decoding entirely, use the option --no-itrace.
It is also possible to skip events generated (instructions, branches, transactions)
at the beginning. This is useful to ignore initialization code.
--itrace=i0nss1000000
skips the first million instructions.
dump option
-----------
perf script has an option (-D) to "dump" the events i.e. display the binary
data.
When -D is used, Intel PT packets are displayed. The packet decoder does not
pay attention to PSB packets, but just decodes the bytes - so the packets seen
by the actual decoder may not be identical in places where the data is corrupt.
One example of that would be when the buffer-switching interrupt has been too
slow, and the buffer has been filled completely. In that case, the last packet
in the buffer might be truncated and immediately followed by a PSB as the trace
continues in the next buffer.
To disable the display of Intel PT packets, combine the -D option with
--no-itrace.
perf report
===========
By default, perf report will decode trace data found in the perf.data file.
This can be further controlled by new option --itrace exactly the same as
perf script, with the exception that the default is --itrace=igxe.
perf inject
===========
perf inject also accepts the --itrace option in which case tracing data is
removed and replaced with the synthesized events. e.g.
perf inject --itrace -i perf.data -o perf.data.new
Below is an example of using Intel PT with autofdo. It requires autofdo
(https://github.com/google/autofdo) and gcc version 5. The bubble
sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial)
amended to take the number of elements as a parameter.
$ gcc-5 -O3 sort.c -o sort_optimized
$ ./sort_optimized 30000
Bubble sorting array of 30000 elements
2254 ms
$ cat ~/.perfconfig
[intel-pt]
mispred-all = on
$ perf record -e intel_pt//u ./sort 3000
Bubble sorting array of 3000 elements
58 ms
[ perf record: Woken up 2 times to write data ]
[ perf record: Captured and wrote 3.939 MB perf.data ]
$ perf inject -i perf.data -o inj --itrace=i100usle --strip
$ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1
$ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo
$ ./sort_autofdo 30000
Bubble sorting array of 30000 elements
2155 ms
Note there is currently no advantage to using Intel PT instead of LBR, but
that may change in the future if greater use is made of the data.

36
Documentation/itrace.txt Normal file
View file

@ -0,0 +1,36 @@
i synthesize instructions events
b synthesize branches events
c synthesize branches events (calls only)
r synthesize branches events (returns only)
x synthesize transactions events
w synthesize ptwrite events
p synthesize power events
e synthesize error events
d create a debug log
g synthesize a call chain (use with i or x)
l synthesize last branch entries (use with i or x)
s skip initial number of events
The default is all events i.e. the same as --itrace=ibxwpe
In addition, the period (default 100000) for instructions events
can be specified in units of:
i instructions
t ticks
ms milliseconds
us microseconds
ns nanoseconds (default)
Also the call chain size (default 16, max. 1024) for instructions or
transactions events can be specified.
Also the number of last branch entries (default 64, max. 1024) for
instructions or transactions events can be specified.
It is also possible to skip events generated (instructions, branches, transactions,
ptwrite, power) at the beginning. This is useful to ignore initialization code.
--itrace=i0nss1000000
skips the first million instructions.

View file

@ -0,0 +1,15 @@
perf supports a simple JIT interface to resolve symbols for dynamic code generated
by a JIT.
The JIT has to write a /tmp/perf-%d.map (%d = pid of process) file
This is a text file.
Each line has the following format, fields separated with spaces:
START SIZE symbolname
START and SIZE are hex numbers without 0x.
symbolname is the rest of the line, so it could contain special characters.
The ownership of the file has to match the process.

View file

@ -0,0 +1,170 @@
JITDUMP specification version 2
Last Revised: 09/15/2016
Author: Stephane Eranian <eranian@gmail.com>
--------------------------------------------------------
| Revision | Date | Description |
--------------------------------------------------------
| 1 | 09/07/2016 | Initial revision |
--------------------------------------------------------
| 2 | 09/15/2016 | Add JIT_CODE_UNWINDING_INFO |
--------------------------------------------------------
I/ Introduction
This document describes the jitdump file format. The file is generated by Just-In-time compiler runtimes to save meta-data information about the generated code, such as address, size, and name of generated functions, the native code generated, the source line information. The data may then be used by performance tools, such as Linux perf to generate function and assembly level profiles.
The format is not specific to any particular programming language. It can be extended as need be.
The format of the file is binary. It is self-describing in terms of endianness and is portable across multiple processor architectures.
II/ Overview of the format
The format requires only sequential accesses, i.e., append only mode. The file starts with a fixed size file header describing the version of the specification, the endianness.
The header is followed by a series of records, each starting with a fixed size header describing the type of record and its size. It is, itself, followed by the payload for the record. Records can have a variable size even for a given type.
Each entry in the file is timestamped. All timestamps must use the same clock source. The CLOCK_MONOTONIC clock source is recommended.
III/ Jitdump file header format
Each jitdump file starts with a fixed size header containing the following fields in order:
* uint32_t magic : a magic number tagging the file type. The value is 4-byte long and represents the string "JiTD" in ASCII form. It is 0x4A695444 or 0x4454694a depending on the endianness. The field can be used to detect the endianness of the file
* uint32_t version : a 4-byte value representing the format version. It is currently set to 2
* uint32_t total_size: size in bytes of file header
* uint32_t elf_mach : ELF architecture encoding (ELF e_machine value as specified in /usr/include/elf.h)
* uint32_t pad1 : padding. Reserved for future use
* uint32_t pid : JIT runtime process identification (OS specific)
* uint64_t timestamp : timestamp of when the file was created
* uint64_t flags : a bitmask of flags
The flags currently defined are as follows:
* bit 0: JITDUMP_FLAGS_ARCH_TIMESTAMP : set if the jitdump file is using an architecture-specific timestamp clock source. For instance, on x86, one could use TSC directly
IV/ Record header
The file header is immediately followed by records. Each record starts with a fixed size header describing the record that follows.
The record header is specified in order as follows:
* uint32_t id : a value identifying the record type (see below)
* uint32_t total_size: the size in bytes of the record including the header.
* uint64_t timestamp : a timestamp of when the record was created.
The following record types are defined:
* Value 0 : JIT_CODE_LOAD : record describing a jitted function
* Value 1 : JIT_CODE_MOVE : record describing an already jitted function which is moved
* Value 2 : JIT_CODE_DEBUG_INFO: record describing the debug information for a jitted function
* Value 3 : JIT_CODE_CLOSE : record marking the end of the jit runtime (optional)
* Value 4 : JIT_CODE_UNWINDING_INFO: record describing a function unwinding information
The payload of the record must immediately follow the record header without padding.
V/ JIT_CODE_LOAD record
The record has the following fields following the fixed-size record header in order:
* uint32_t pid: OS process id of the runtime generating the jitted code
* uint32_t tid: OS thread identification of the runtime thread generating the jitted code
* uint64_t vma: virtual address of jitted code start
* uint64_t code_addr: code start address for the jitted code. By default vma = code_addr
* uint64_t code_size: size in bytes of the generated jitted code
* uint64_t code_index: unique identifier for the jitted code (see below)
* char[n]: function name in ASCII including the null termination
* native code: raw byte encoding of the jitted code
The record header total_size field is inclusive of all components:
* record header
* fixed-sized fields
* function name string, including termination
* native code length
* record specific variable data (e.g., array of data entries)
The code_index is used to uniquely identify each jitted function. The index can be a monotonically increasing 64-bit value. Each time a function is jitted it gets a new number. This value is used in case the code for a function is moved and avoids having to issue another JIT_CODE_LOAD record.
The format supports empty functions with no native code.
VI/ JIT_CODE_MOVE record
The record type is optional.
The record has the following fields following the fixed-size record header in order:
* uint32_t pid : OS process id of the runtime generating the jitted code
* uint32_t tid : OS thread identification of the runtime thread generating the jitted code
* uint64_t vma : new virtual address of jitted code start
* uint64_t old_code_addr: previous code address for the same function
* uint64_t new_code_addr: alternate new code started address for the jitted code. By default it should be equal to the vma address.
* uint64_t code_size : size in bytes of the jitted code
* uint64_t code_index : index referring to the JIT_CODE_LOAD code_index record of when the function was initially jitted
The MOVE record can be used in case an already jitted function is simply moved by the runtime inside the code cache.
The JIT_CODE_MOVE record cannot come before the JIT_CODE_LOAD record for the same function name. The function cannot have changed name, otherwise a new JIT_CODE_LOAD record must be emitted.
The code size of the function cannot change.
VII/ JIT_DEBUG_INFO record
The record type is optional.
The record contains source lines debug information, i.e., a way to map a code address back to a source line. This information may be used by the performance tool.
The record has the following fields following the fixed-size record header in order:
* uint64_t code_addr: address of function for which the debug information is generated
* uint64_t nr_entry : number of debug entries for the function
* debug_entry[n]: array of nr_entry debug entries for the function
The debug_entry describes the source line information. It is defined as follows in order:
* uint64_t code_addr: address of function for which the debug information is generated
* uint32_t line : source file line number (starting at 1)
* uint32_t discrim : column discriminator, 0 is default
* char name[n] : source file name in ASCII, including null termination
The debug_entry entries are saved in sequence but given that they have variable sizes due to the file name string, they cannot be indexed directly.
They need to be walked sequentially. The next debug_entry is found at sizeof(debug_entry) + strlen(name) + 1.
IMPORTANT:
The JIT_CODE_DEBUG for a given function must always be generated BEFORE the JIT_CODE_LOAD for the function. This facilitates greatly the parser for the jitdump file.
VIII/ JIT_CODE_CLOSE record
The record type is optional.
The record is used as a marker for the end of the jitted runtime. It can be replaced by the end of the file.
The JIT_CODE_CLOSE record does not have any specific fields, the record header contains all the information needed.
IX/ JIT_CODE_UNWINDING_INFO
The record type is optional.
The record is used to describe the unwinding information for a jitted function.
The record has the following fields following the fixed-size record header in order:
uint64_t unwind_data_size : the size in bytes of the unwinding data table at the end of the record
uint64_t eh_frame_hdr_size : the size in bytes of the DWARF EH Frame Header at the start of the unwinding data table at the end of the record
uint64_t mapped_size : the size of the unwinding data mapped in memory
const char unwinding_data[n]: an array of unwinding data, consisting of the EH Frame Header, followed by the actual EH Frame
The EH Frame header follows the Linux Standard Base (LSB) specification as described in the document at https://refspecs.linuxfoundation.org/LSB_1.3.0/gLSB/gLSB/ehframehdr.html
The EH Frame follows the LSB specicfication as described in the document at https://refspecs.linuxbase.org/LSB_3.0.0/LSB-PDA/LSB-PDA/ehframechpt.html
NOTE: The mapped_size is generally either the same as unwind_data_size (if the unwinding data was mapped in memory by the running process) or zero (if the unwinding data is not mapped by the process). If the unwinding data was not mapped, then only the EH Frame Header will be read, which can be used to specify FP based unwinding for a function which does not have unwinding information.

View file

@ -0,0 +1,14 @@
<!-- manpage-1.72.xsl:
special settings for manpages rendered from asciidoc+docbook
handles peculiarities in docbook-xsl 1.72.0 -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<xsl:import href="manpage-base.xsl"/>
<!-- these are the special values for the roff control characters
needed for docbook-xsl 1.72.0 -->
<xsl:param name="git.docbook.backslash">&#x2593;</xsl:param>
<xsl:param name="git.docbook.dot" >&#x2302;</xsl:param>
</xsl:stylesheet>

View file

@ -0,0 +1,35 @@
<!-- manpage-base.xsl:
special formatting for manpages rendered from asciidoc+docbook -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<!-- these params silence some output from xmlto -->
<xsl:param name="man.output.quietly" select="1"/>
<xsl:param name="refentry.meta.get.quietly" select="1"/>
<!-- convert asciidoc callouts to man page format;
git.docbook.backslash and git.docbook.dot params
must be supplied by another XSL file or other means -->
<xsl:template match="co">
<xsl:value-of select="concat(
$git.docbook.backslash,'fB(',
substring-after(@id,'-'),')',
$git.docbook.backslash,'fR')"/>
</xsl:template>
<xsl:template match="calloutlist">
<xsl:value-of select="$git.docbook.dot"/>
<xsl:text>sp&#10;</xsl:text>
<xsl:apply-templates/>
<xsl:text>&#10;</xsl:text>
</xsl:template>
<xsl:template match="callout">
<xsl:value-of select="concat(
$git.docbook.backslash,'fB',
substring-after(@arearefs,'-'),
'. ',$git.docbook.backslash,'fR')"/>
<xsl:apply-templates/>
<xsl:value-of select="$git.docbook.dot"/>
<xsl:text>br&#10;</xsl:text>
</xsl:template>
</xsl:stylesheet>

View file

@ -0,0 +1,17 @@
<!-- manpage-bold-literal.xsl:
special formatting for manpages rendered from asciidoc+docbook -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<!-- render literal text as bold (instead of plain or monospace);
this makes literal text easier to distinguish in manpages
viewed on a tty -->
<xsl:template match="literal">
<xsl:value-of select="$git.docbook.backslash"/>
<xsl:text>fB</xsl:text>
<xsl:apply-templates/>
<xsl:value-of select="$git.docbook.backslash"/>
<xsl:text>fR</xsl:text>
</xsl:template>
</xsl:stylesheet>

View file

@ -0,0 +1,13 @@
<!-- manpage-normal.xsl:
special settings for manpages rendered from asciidoc+docbook
handles anything we want to keep away from docbook-xsl 1.72.0 -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<xsl:import href="manpage-base.xsl"/>
<!-- these are the normal values for the roff control characters -->
<xsl:param name="git.docbook.backslash">\</xsl:param>
<xsl:param name="git.docbook.dot" >.</xsl:param>
</xsl:stylesheet>

View file

@ -0,0 +1,21 @@
<!-- manpage-suppress-sp.xsl:
special settings for manpages rendered from asciidoc+docbook
handles erroneous, inline .sp in manpage output of some
versions of docbook-xsl -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<!-- attempt to work around spurious .sp at the tail of the line
that some versions of docbook stylesheets seem to add -->
<xsl:template match="simpara">
<xsl:variable name="content">
<xsl:apply-templates/>
</xsl:variable>
<xsl:value-of select="normalize-space($content)"/>
<xsl:if test="not(ancestor::authorblurb) and
not(ancestor::personblurb)">
<xsl:text>&#10;&#10;</xsl:text>
</xsl:if>
</xsl:template>
</xsl:stylesheet>

View file

@ -0,0 +1,123 @@
perf-annotate(1)
================
NAME
----
perf-annotate - Read perf.data (created by perf record) and display annotated code
SYNOPSIS
--------
[verse]
'perf annotate' [-i <file> | --input=file] [symbol_name]
DESCRIPTION
-----------
This command reads the input file and displays an annotated version of the
code. If the object file has debug symbols then the source code will be
displayed alongside assembly code.
If there is no debug info in the object, then annotated assembly is displayed.
OPTIONS
-------
-i::
--input=<file>::
Input file name. (default: perf.data unless stdin is a fifo)
-d::
--dsos=<dso[,dso...]>::
Only consider symbols in these dsos.
-s::
--symbol=<symbol>::
Symbol to annotate.
-f::
--force::
Don't do ownership validation.
-v::
--verbose::
Be more verbose. (Show symbol address, etc)
-q::
--quiet::
Do not show any message. (Suppress -v)
-n::
--show-nr-samples::
Show the number of samples for each symbol
-D::
--dump-raw-trace::
Dump raw trace in ASCII.
-k::
--vmlinux=<file>::
vmlinux pathname.
--ignore-vmlinux::
Ignore vmlinux files.
-m::
--modules::
Load module symbols. WARNING: use only with -k and LIVE kernel.
-l::
--print-line::
Print matching source lines (may be slow).
-P::
--full-paths::
Don't shorten the displayed pathnames.
--stdio:: Use the stdio interface.
--stdio2:: Use the stdio2 interface, non-interactive, uses the TUI formatting.
--stdio-color=<mode>::
'always', 'never' or 'auto', allowing configuring color output
via the command line, in addition to via "color.ui" .perfconfig.
Use '--stdio-color always' to generate color even when redirecting
to a pipe or file. Using just '--stdio-color' is equivalent to
using 'always'.
--tui:: Use the TUI interface. Use of --tui requires a tty, if one is not
present, as when piping to other commands, the stdio interface is
used. This interfaces starts by centering on the line with more
samples, TAB/UNTAB cycles through the lines with more samples.
--gtk:: Use the GTK interface.
-C::
--cpu=<cpu>:: Only report samples for the list of CPUs provided. Multiple CPUs can
be provided as a comma-separated list with no space: 0,1. Ranges of
CPUs are specified with -: 0-2. Default is to report samples on all
CPUs.
--asm-raw::
Show raw instruction encoding of assembly instructions.
--show-total-period:: Show a column with the sum of periods.
--source::
Interleave source code with assembly code. Enabled by default,
disable with --no-source.
--symfs=<directory>::
Look for files with symbols relative to this directory.
-M::
--disassembler-style=:: Set disassembler style for objdump.
--objdump=<path>::
Path to objdump binary.
--skip-missing::
Skip symbols that cannot be annotated.
--group::
Show event group information together
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1]

View file

@ -0,0 +1,22 @@
perf-archive(1)
===============
NAME
----
perf-archive - Create archive with object files with build-ids found in perf.data file
SYNOPSIS
--------
[verse]
'perf archive' [file]
DESCRIPTION
-----------
This command runs perf-buildid-list --with-hits, and collects the files with the
buildids found so that analysis of perf.data contents can be possible on another
machine.
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-buildid-list[1], linkperf:perf-report[1]

View file

@ -0,0 +1,209 @@
perf-bench(1)
=============
NAME
----
perf-bench - General framework for benchmark suites
SYNOPSIS
--------
[verse]
'perf bench' [<common options>] <subsystem> <suite> [<options>]
DESCRIPTION
-----------
This 'perf bench' command is a general framework for benchmark suites.
COMMON OPTIONS
--------------
-r::
--repeat=::
Specify amount of times to repeat the run (default 10).
-f::
--format=::
Specify format style.
Current available format styles are:
'default'::
Default style. This is mainly for human reading.
---------------------
% perf bench sched pipe # with no style specified
(executing 1000000 pipe operations between two tasks)
Total time:5.855 sec
5.855061 usecs/op
170792 ops/sec
---------------------
'simple'::
This simple style is friendly for automated
processing by scripts.
---------------------
% perf bench --format=simple sched pipe # specified simple
5.988
---------------------
SUBSYSTEM
---------
'sched'::
Scheduler and IPC mechanisms.
'mem'::
Memory access performance.
'numa'::
NUMA scheduling and MM benchmarks.
'futex'::
Futex stressing benchmarks.
'all'::
All benchmark subsystems.
SUITES FOR 'sched'
~~~~~~~~~~~~~~~~~~
*messaging*::
Suite for evaluating performance of scheduler and IPC mechanisms.
Based on hackbench by Rusty Russell.
Options of *messaging*
^^^^^^^^^^^^^^^^^^^^^^
-p::
--pipe::
Use pipe() instead of socketpair()
-t::
--thread::
Be multi thread instead of multi process
-g::
--group=::
Specify number of groups
-l::
--nr_loops=::
Specify number of loops
Example of *messaging*
^^^^^^^^^^^^^^^^^^^^^^
---------------------
% perf bench sched messaging # run with default
options (20 sender and receiver processes per group)
(10 groups == 400 processes run)
Total time:0.308 sec
% perf bench sched messaging -t -g 20 # be multi-thread, with 20 groups
(20 sender and receiver threads per group)
(20 groups == 800 threads run)
Total time:0.582 sec
---------------------
*pipe*::
Suite for pipe() system call.
Based on pipe-test-1m.c by Ingo Molnar.
Options of *pipe*
^^^^^^^^^^^^^^^^^
-l::
--loop=::
Specify number of loops.
Example of *pipe*
^^^^^^^^^^^^^^^^^
---------------------
% perf bench sched pipe
(executing 1000000 pipe operations between two tasks)
Total time:8.091 sec
8.091833 usecs/op
123581 ops/sec
% perf bench sched pipe -l 1000 # loop 1000
(executing 1000 pipe operations between two tasks)
Total time:0.016 sec
16.948000 usecs/op
59004 ops/sec
---------------------
SUITES FOR 'mem'
~~~~~~~~~~~~~~~~
*memcpy*::
Suite for evaluating performance of simple memory copy in various ways.
Options of *memcpy*
^^^^^^^^^^^^^^^^^^^
-l::
--size::
Specify size of memory to copy (default: 1MB).
Available units are B, KB, MB, GB and TB (case insensitive).
-f::
--function::
Specify function to copy (default: default).
Available functions are depend on the architecture.
On x86-64, x86-64-unrolled, x86-64-movsq and x86-64-movsb are supported.
-l::
--nr_loops::
Repeat memcpy invocation this number of times.
-c::
--cycles::
Use perf's cpu-cycles event instead of gettimeofday syscall.
*memset*::
Suite for evaluating performance of simple memory set in various ways.
Options of *memset*
^^^^^^^^^^^^^^^^^^^
-l::
--size::
Specify size of memory to set (default: 1MB).
Available units are B, KB, MB, GB and TB (case insensitive).
-f::
--function::
Specify function to set (default: default).
Available functions are depend on the architecture.
On x86-64, x86-64-unrolled, x86-64-stosq and x86-64-stosb are supported.
-l::
--nr_loops::
Repeat memset invocation this number of times.
-c::
--cycles::
Use perf's cpu-cycles event instead of gettimeofday syscall.
SUITES FOR 'numa'
~~~~~~~~~~~~~~~~~
*mem*::
Suite for evaluating NUMA workloads.
SUITES FOR 'futex'
~~~~~~~~~~~~~~~~~~
*hash*::
Suite for evaluating hash tables.
*wake*::
Suite for evaluating wake calls.
*wake-parallel*::
Suite for evaluating parallel wake calls.
*requeue*::
Suite for evaluating requeue calls.
*lock-pi*::
Suite for evaluating futex lock_pi calls.
SEE ALSO
--------
linkperf:perf[1]

View file

@ -0,0 +1,74 @@
perf-buildid-cache(1)
=====================
NAME
----
perf-buildid-cache - Manage build-id cache.
SYNOPSIS
--------
[verse]
'perf buildid-cache <options>'
DESCRIPTION
-----------
This command manages the build-id cache. It can add, remove, update and purge
files to/from the cache. In the future it should as well set upper limits for
the space used by the cache, etc.
This also scans the target binary for SDT (Statically Defined Tracing) and
record it along with the buildid-cache, which will be used by perf-probe.
For more details, see linkperf:perf-probe[1].
OPTIONS
-------
-a::
--add=::
Add specified file to the cache.
-f::
--force::
Don't complain, do it.
-k::
--kcore::
Add specified kcore file to the cache. For the current host that is
/proc/kcore which requires root permissions to read. Be aware that
running 'perf buildid-cache' as root may update root's build-id cache
not the user's. Use the -v option to see where the file is created.
Note that the copied file contains only code sections not the whole core
image. Note also that files "kallsyms" and "modules" must also be in the
same directory and are also copied. All 3 files are created with read
permissions for root only. kcore will not be added if there is already a
kcore in the cache (with the same build-id) that has the same modules at
the same addresses. Use the -v option to see if a copy of kcore is
actually made.
-r::
--remove=::
Remove a cached binary which has same build-id of specified file
from the cache.
-p::
--purge=::
Purge all cached binaries including older caches which have specified
path from the cache.
-M::
--missing=::
List missing build ids in the cache for the specified file.
-u::
--update=::
Update specified file of the cache. Note that this doesn't remove
older entires since those may be still needed for annotating old
(or remote) perf.data. Only if there is already a cache which has
exactly same build-id, that is replaced by new one. It can be used
to update kallsyms and kernel dso to vmlinux in order to support
annotation.
-v::
--verbose::
Be more verbose.
--target-ns=PID:
Obtain mount namespace information from the target pid. This is
used when creating a uprobe for a process that resides in a
different mount namespace from the perf(1) utility.
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-buildid-list[1]

View file

@ -0,0 +1,43 @@
perf-buildid-list(1)
====================
NAME
----
perf-buildid-list - List the buildids in a perf.data file
SYNOPSIS
--------
[verse]
'perf buildid-list <options>'
DESCRIPTION
-----------
This command displays the buildids found in a perf.data file, so that other
tools can be used to fetch packages with matching symbol tables for use by
perf report.
It can also be used to show the build id of the running kernel or in an ELF
file using -i/--input.
OPTIONS
-------
-H::
--with-hits::
Show only DSOs with hits.
-i::
--input=::
Input file name. (default: perf.data unless stdin is a fifo)
-f::
--force::
Don't do ownership validation.
-k::
--kernel::
Show running kernel build id.
-v::
--verbose::
Be more verbose.
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-top[1],
linkperf:perf-report[1]

290
Documentation/perf-c2c.txt Normal file
View file

@ -0,0 +1,290 @@
perf-c2c(1)
===========
NAME
----
perf-c2c - Shared Data C2C/HITM Analyzer.
SYNOPSIS
--------
[verse]
'perf c2c record' [<options>] <command>
'perf c2c record' [<options>] -- [<record command options>] <command>
'perf c2c report' [<options>]
DESCRIPTION
-----------
C2C stands for Cache To Cache.
The perf c2c tool provides means for Shared Data C2C/HITM analysis. It allows
you to track down the cacheline contentions.
The tool is based on x86's load latency and precise store facility events
provided by Intel CPUs. These events provide:
- memory address of the access
- type of the access (load and store details)
- latency (in cycles) of the load access
The c2c tool provide means to record this data and report back access details
for cachelines with highest contention - highest number of HITM accesses.
The basic workflow with this tool follows the standard record/report phase.
User uses the record command to record events data and report command to
display it.
RECORD OPTIONS
--------------
-e::
--event=::
Select the PMU event. Use 'perf mem record -e list'
to list available events.
-v::
--verbose::
Be more verbose (show counter open errors, etc).
-l::
--ldlat::
Configure mem-loads latency.
-k::
--all-kernel::
Configure all used events to run in kernel space.
-u::
--all-user::
Configure all used events to run in user space.
REPORT OPTIONS
--------------
-k::
--vmlinux=<file>::
vmlinux pathname
-v::
--verbose::
Be more verbose (show counter open errors, etc).
-i::
--input::
Specify the input file to process.
-N::
--node-info::
Show extra node info in report (see NODE INFO section)
-c::
--coalesce::
Specify sorting fields for single cacheline display.
Following fields are available: tid,pid,iaddr,dso
(see COALESCE)
-g::
--call-graph::
Setup callchains parameters.
Please refer to perf-report man page for details.
--stdio::
Force the stdio output (see STDIO OUTPUT)
--stats::
Display only statistic tables and force stdio mode.
--full-symbols::
Display full length of symbols.
--no-source::
Do not display Source:Line column.
--show-all::
Show all captured HITM lines, with no regard to HITM % 0.0005 limit.
-f::
--force::
Don't do ownership validation.
-d::
--display::
Switch to HITM type (rmt, lcl) to display and sort on. Total HITMs as default.
C2C RECORD
----------
The perf c2c record command setup options related to HITM cacheline analysis
and calls standard perf record command.
Following perf record options are configured by default:
(check perf record man page for details)
-W,-d,--phys-data,--sample-cpu
Unless specified otherwise with '-e' option, following events are monitored by
default:
cpu/mem-loads,ldlat=30/P
cpu/mem-stores/P
User can pass any 'perf record' option behind '--' mark, like (to enable
callchains and system wide monitoring):
$ perf c2c record -- -g -a
Please check RECORD OPTIONS section for specific c2c record options.
C2C REPORT
----------
The perf c2c report command displays shared data analysis. It comes in two
display modes: stdio and tui (default).
The report command workflow is following:
- sort all the data based on the cacheline address
- store access details for each cacheline
- sort all cachelines based on user settings
- display data
In general perf report output consist of 2 basic views:
1) most expensive cachelines list
2) offsets details for each cacheline
For each cacheline in the 1) list we display following data:
(Both stdio and TUI modes follow the same fields output)
Index
- zero based index to identify the cacheline
Cacheline
- cacheline address (hex number)
Total records
- sum of all cachelines accesses
Rmt/Lcl Hitm
- cacheline percentage of all Remote/Local HITM accesses
LLC Load Hitm - Total, Lcl, Rmt
- count of Total/Local/Remote load HITMs
Store Reference - Total, L1Hit, L1Miss
Total - all store accesses
L1Hit - store accesses that hit L1
L1Hit - store accesses that missed L1
Load Dram
- count of local and remote DRAM accesses
LLC Ld Miss
- count of all accesses that missed LLC
Total Loads
- sum of all load accesses
Core Load Hit - FB, L1, L2
- count of load hits in FB (Fill Buffer), L1 and L2 cache
LLC Load Hit - Llc, Rmt
- count of LLC and Remote load hits
For each offset in the 2) list we display following data:
HITM - Rmt, Lcl
- % of Remote/Local HITM accesses for given offset within cacheline
Store Refs - L1 Hit, L1 Miss
- % of store accesses that hit/missed L1 for given offset within cacheline
Data address - Offset
- offset address
Pid
- pid of the process responsible for the accesses
Tid
- tid of the process responsible for the accesses
Code address
- code address responsible for the accesses
cycles - rmt hitm, lcl hitm, load
- sum of cycles for given accesses - Remote/Local HITM and generic load
cpu cnt
- number of cpus that participated on the access
Symbol
- code symbol related to the 'Code address' value
Shared Object
- shared object name related to the 'Code address' value
Source:Line
- source information related to the 'Code address' value
Node
- nodes participating on the access (see NODE INFO section)
NODE INFO
---------
The 'Node' field displays nodes that accesses given cacheline
offset. Its output comes in 3 flavors:
- node IDs separated by ','
- node IDs with stats for each ID, in following format:
Node{cpus %hitms %stores}
- node IDs with list of affected CPUs in following format:
Node{cpu list}
User can switch between above flavors with -N option or
use 'n' key to interactively switch in TUI mode.
COALESCE
--------
User can specify how to sort offsets for cacheline.
Following fields are available and governs the final
output fields set for caheline offsets output:
tid - coalesced by process TIDs
pid - coalesced by process PIDs
iaddr - coalesced by code address, following fields are displayed:
Code address, Code symbol, Shared Object, Source line
dso - coalesced by shared object
By default the coalescing is setup with 'pid,iaddr'.
STDIO OUTPUT
------------
The stdio output displays data on standard output.
Following tables are displayed:
Trace Event Information
- overall statistics of memory accesses
Global Shared Cache Line Event Information
- overall statistics on shared cachelines
Shared Data Cache Line Table
- list of most expensive cachelines
Shared Cache Line Distribution Pareto
- list of all accessed offsets for each cacheline
TUI OUTPUT
----------
The TUI output provides interactive interface to navigate
through cachelines list and to display offset details.
For details please refer to the help window by pressing '?' key.
CREDITS
-------
Although Don Zickus, Dick Fowles and Joe Mario worked together
to get this implemented, we got lots of early help from Arnaldo
Carvalho de Melo, Stephane Eranian, Jiri Olsa and Andi Kleen.
C2C BLOG
--------
Check Joe's blog on c2c tool for detailed use case explanation:
https://joemario.github.io/blog/2016/09/01/c2c-blog/
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-mem[1]

View file

@ -0,0 +1,520 @@
perf-config(1)
==============
NAME
----
perf-config - Get and set variables in a configuration file.
SYNOPSIS
--------
[verse]
'perf config' [<file-option>] [section.name[=value] ...]
or
'perf config' [<file-option>] -l | --list
DESCRIPTION
-----------
You can manage variables in a configuration file with this command.
OPTIONS
-------
-l::
--list::
Show current config variables, name and value, for all sections.
--user::
For writing and reading options: write to user
'$HOME/.perfconfig' file or read it.
--system::
For writing and reading options: write to system-wide
'$(sysconfdir)/perfconfig' or read it.
CONFIGURATION FILE
------------------
The perf configuration file contains many variables to change various
aspects of each of its tools, including output, disk usage, etc.
The '$HOME/.perfconfig' file is used to store a per-user configuration.
The file '$(sysconfdir)/perfconfig' can be used to
store a system-wide default configuration.
When reading or writing, the values are read from the system and user
configuration files by default, and options '--system' and '--user'
can be used to tell the command to read from or write to only that location.
Syntax
~~~~~~
The file consist of sections. A section starts with its name
surrounded by square brackets and continues till the next section
begins. Each variable must be in a section, and have the form
'name = value', for example:
[section]
name1 = value1
name2 = value2
Section names are case sensitive and can contain any characters except
newline (double quote `"` and backslash have to be escaped as `\"` and `\\`,
respectively). Section headers can't span multiple lines.
Example
~~~~~~~
Given a $HOME/.perfconfig like this:
#
# This is the config file, and
# a '#' and ';' character indicates a comment
#
[colors]
# Color variables
top = red, default
medium = green, default
normal = lightgray, default
selected = white, lightgray
jump_arrows = blue, default
addr = magenta, default
root = white, blue
[tui]
# Defaults if linked with libslang
report = on
annotate = on
top = on
[buildid]
# Default, disable using /dev/null
dir = ~/.debug
[annotate]
# Defaults
hide_src_code = false
use_offset = true
jump_arrows = true
show_nr_jumps = false
[help]
# Format can be man, info, web or html
format = man
autocorrect = 0
[ui]
show-headers = true
[call-graph]
# fp (framepointer), dwarf
record-mode = fp
print-type = graph
order = caller
sort-key = function
[report]
# Defaults
sort-order = comm,dso,symbol
percent-limit = 0
queue-size = 0
children = true
group = true
You can hide source code of annotate feature setting the config to false with
% perf config annotate.hide_src_code=true
If you want to add or modify several config items, you can do like
% perf config ui.show-headers=false kmem.default=slab
To modify the sort order of report functionality in user config file(i.e. `~/.perfconfig`), do
% perf config --user report sort-order=srcline
To change colors of selected line to other foreground and background colors
in system config file (i.e. `$(sysconf)/perfconfig`), do
% perf config --system colors.selected=yellow,green
To query the record mode of call graph, do
% perf config call-graph.record-mode
If you want to know multiple config key/value pairs, you can do like
% perf config report.queue-size call-graph.order report.children
To query the config value of sort order of call graph in user config file (i.e. `~/.perfconfig`), do
% perf config --user call-graph.sort-order
To query the config value of buildid directory in system config file (i.e. `$(sysconf)/perfconfig`), do
% perf config --system buildid.dir
Variables
~~~~~~~~~
colors.*::
The variables for customizing the colors used in the output for the
'report', 'top' and 'annotate' in the TUI. They should specify the
foreground and background colors, separated by a comma, for example:
medium = green, lightgray
If you want to use the color configured for you terminal, just leave it
as 'default', for example:
medium = default, lightgray
Available colors:
red, yellow, green, cyan, gray, black, blue,
white, default, magenta, lightgray
colors.top::
'top' means a overhead percentage which is more than 5%.
And values of this variable specify percentage colors.
Basic key values are foreground-color 'red' and
background-color 'default'.
colors.medium::
'medium' means a overhead percentage which has more than 0.5%.
Default values are 'green' and 'default'.
colors.normal::
'normal' means the rest of overhead percentages
except 'top', 'medium', 'selected'.
Default values are 'lightgray' and 'default'.
colors.selected::
This selects the colors for the current entry in a list of entries
from sub-commands (top, report, annotate).
Default values are 'black' and 'lightgray'.
colors.jump_arrows::
Colors for jump arrows on assembly code listings
such as 'jns', 'jmp', 'jane', etc.
Default values are 'blue', 'default'.
colors.addr::
This selects colors for addresses from 'annotate'.
Default values are 'magenta', 'default'.
colors.root::
Colors for headers in the output of a sub-commands (top, report).
Default values are 'white', 'blue'.
tui.*, gtk.*::
Subcommands that can be configured here are 'top', 'report' and 'annotate'.
These values are booleans, for example:
[tui]
top = true
will make the TUI be the default for the 'top' subcommand. Those will be
available if the required libs were detected at tool build time.
buildid.*::
buildid.dir::
Each executable and shared library in modern distributions comes with a
content based identifier that, if available, will be inserted in a
'perf.data' file header to, at analysis time find what is needed to do
symbol resolution, code annotation, etc.
The recording tools also stores a hard link or copy in a per-user
directory, $HOME/.debug/, of binaries, shared libraries, /proc/kallsyms
and /proc/kcore files to be used at analysis time.
The buildid.dir variable can be used to either change this directory
cache location, or to disable it altogether. If you want to disable it,
set buildid.dir to /dev/null. The default is $HOME/.debug
annotate.*::
These options work only for TUI.
These are in control of addresses, jump function, source code
in lines of assembly code from a specific program.
annotate.hide_src_code::
If a program which is analyzed has source code,
this option lets 'annotate' print a list of assembly code with the source code.
For example, let's see a part of a program. There're four lines.
If this option is 'true', they can be printed
without source code from a program as below.
│ push %rbp
│ mov %rsp,%rbp
│ sub $0x10,%rsp
│ mov (%rdi),%rdx
But if this option is 'false', source code of the part
can be also printed as below. Default is 'false'.
│ struct rb_node *rb_next(const struct rb_node *node)
│ {
│ push %rbp
│ mov %rsp,%rbp
│ sub $0x10,%rsp
│ struct rb_node *parent;
│ if (RB_EMPTY_NODE(node))
│ mov (%rdi),%rdx
│ return n;
annotate.use_offset::
Basing on a first address of a loaded function, offset can be used.
Instead of using original addresses of assembly code,
addresses subtracted from a base address can be printed.
Let's illustrate an example.
If a base address is 0XFFFFFFFF81624d50 as below,
ffffffff81624d50 <load0>
an address on assembly code has a specific absolute address as below
ffffffff816250b8:│ mov 0x8(%r14),%rdi
but if use_offset is 'true', an address subtracted from a base address is printed.
Default is true. This option is only applied to TUI.
368:│ mov 0x8(%r14),%rdi
annotate.jump_arrows::
There can be jump instruction among assembly code.
Depending on a boolean value of jump_arrows,
arrows can be printed or not which represent
where do the instruction jump into as below.
│ ┌──jmp 1333
│ │ xchg %ax,%ax
│1330:│ mov %r15,%r10
│1333:└─→cmp %r15,%r14
If jump_arrow is 'false', the arrows isn't printed as below.
Default is 'false'.
│ ↓ jmp 1333
│ xchg %ax,%ax
│1330: mov %r15,%r10
│1333: cmp %r15,%r14
annotate.show_linenr::
When showing source code if this option is 'true',
line numbers are printed as below.
│1628 if (type & PERF_SAMPLE_IDENTIFIER) {
│ ↓ jne 508
│1628 data->id = *array;
│1629 array++;
│1630 }
However if this option is 'false', they aren't printed as below.
Default is 'false'.
│ if (type & PERF_SAMPLE_IDENTIFIER) {
│ ↓ jne 508
│ data->id = *array;
│ array++;
│ }
annotate.show_nr_jumps::
Let's see a part of assembly code.
│1382: movb $0x1,-0x270(%rbp)
If use this, the number of branches jumping to that address can be printed as below.
Default is 'false'.
│1 1382: movb $0x1,-0x270(%rbp)
annotate.show_total_period::
To compare two records on an instruction base, with this option
provided, display total number of samples that belong to a line
in assembly code. If this option is 'true', total periods are printed
instead of percent values as below.
302 │ mov %eax,%eax
But if this option is 'false', percent values for overhead are printed i.e.
Default is 'false'.
99.93 │ mov %eax,%eax
annotate.offset_level::
Default is '1', meaning just jump targets will have offsets show right beside
the instruction. When set to '2' 'call' instructions will also have its offsets
shown, 3 or higher will show offsets for all instructions.
hist.*::
hist.percentage::
This option control the way to calculate overhead of filtered entries -
that means the value of this option is effective only if there's a
filter (by comm, dso or symbol name). Suppose a following example:
Overhead Symbols
........ .......
33.33% foo
33.33% bar
33.33% baz
This is an original overhead and we'll filter out the first 'foo'
entry. The value of 'relative' would increase the overhead of 'bar'
and 'baz' to 50.00% for each, while 'absolute' would show their
current overhead (33.33%).
ui.*::
ui.show-headers::
This option controls display of column headers (like 'Overhead' and 'Symbol')
in 'report' and 'top'. If this option is false, they are hidden.
This option is only applied to TUI.
call-graph.*::
When sub-commands 'top' and 'report' work with -g/—-children
there're options in control of call-graph.
call-graph.record-mode::
The record-mode can be 'fp' (frame pointer), 'dwarf' and 'lbr'.
The value of 'dwarf' is effective only if perf detect needed library
(libunwind or a recent version of libdw).
'lbr' only work for cpus that support it.
call-graph.dump-size::
The size of stack to dump in order to do post-unwinding. Default is 8192 (byte).
When using dwarf into record-mode, the default size will be used if omitted.
call-graph.print-type::
The print-types can be graph (graph absolute), fractal (graph relative),
flat and folded. This option controls a way to show overhead for each callchain
entry. Suppose a following example.
Overhead Symbols
........ .......
40.00% foo
|
---foo
|
|--50.00%--bar
| main
|
--50.00%--baz
main
This output is a 'fractal' format. The 'foo' came from 'bar' and 'baz' exactly
half and half so 'fractal' shows 50.00% for each
(meaning that it assumes 100% total overhead of 'foo').
The 'graph' uses absolute overhead value of 'foo' as total so each of
'bar' and 'baz' callchain will have 20.00% of overhead.
If 'flat' is used, single column and linear exposure of call chains.
'folded' mean call chains are displayed in a line, separated by semicolons.
call-graph.order::
This option controls print order of callchains. The default is
'callee' which means callee is printed at top and then followed by its
caller and so on. The 'caller' prints it in reverse order.
If this option is not set and report.children or top.children is
set to true (or the equivalent command line option is given),
the default value of this option is changed to 'caller' for the
execution of 'perf report' or 'perf top'. Other commands will
still default to 'callee'.
call-graph.sort-key::
The callchains are merged if they contain same information.
The sort-key option determines a way to compare the callchains.
A value of 'sort-key' can be 'function' or 'address'.
The default is 'function'.
call-graph.threshold::
When there're many callchains it'd print tons of lines. So perf omits
small callchains under a certain overhead (threshold) and this option
control the threshold. Default is 0.5 (%). The overhead is calculated
by value depends on call-graph.print-type.
call-graph.print-limit::
This is a maximum number of lines of callchain printed for a single
histogram entry. Default is 0 which means no limitation.
report.*::
report.sort_order::
Allows changing the default sort order from "comm,dso,symbol" to
some other default, for instance "sym,dso" may be more fitting for
kernel developers.
report.percent-limit::
This one is mostly the same as call-graph.threshold but works for
histogram entries. Entries having an overhead lower than this
percentage will not be printed. Default is '0'. If percent-limit
is '10', only entries which have more than 10% of overhead will be
printed.
report.queue-size::
This option sets up the maximum allocation size of the internal
event queue for ordering events. Default is 0, meaning no limit.
report.children::
'Children' means functions called from another function.
If this option is true, 'perf report' cumulates callchains of children
and show (accumulated) total overhead as well as 'Self' overhead.
Please refer to the 'perf report' manual. The default is 'true'.
report.group::
This option is to show event group information together.
Example output with this turned on, notice that there is one column
per event in the group, ref-cycles and cycles:
# group: {ref-cycles,cycles}
# ========
#
# Samples: 7K of event 'anon group { ref-cycles, cycles }'
# Event count (approx.): 6876107743
#
# Overhead Command Shared Object Symbol
# ................ ....... ................. ...................
#
99.84% 99.76% noploop noploop [.] main
0.07% 0.00% noploop ld-2.15.so [.] strcmp
0.03% 0.00% noploop [kernel.kallsyms] [k] timerqueue_del
top.*::
top.children::
Same as 'report.children'. So if it is enabled, the output of 'top'
command will have 'Children' overhead column as well as 'Self' overhead
column by default.
The default is 'true'.
man.*::
man.viewer::
This option can assign a tool to view manual pages when 'help'
subcommand was invoked. Supported tools are 'man', 'woman'
(with emacs client) and 'konqueror'. Default is 'man'.
New man viewer tool can be also added using 'man.<tool>.cmd'
or use different path using 'man.<tool>.path' config option.
pager.*::
pager.<subcommand>::
When the subcommand is run on stdio, determine whether it uses
pager or not based on this value. Default is 'unspecified'.
kmem.*::
kmem.default::
This option decides which allocator is to be analyzed if neither
'--slab' nor '--page' option is used. Default is 'slab'.
record.*::
record.build-id::
This option can be 'cache', 'no-cache' or 'skip'.
'cache' is to post-process data and save/update the binaries into
the build-id cache (in ~/.debug). This is the default.
But if this option is 'no-cache', it will not update the build-id cache.
'skip' skips post-processing and does not update the cache.
diff.*::
diff.order::
This option sets the number of columns to sort the result.
The default is 0, which means sorting by baseline.
Setting it to 1 will sort the result by delta (or other
compute method selected).
diff.compute::
This options sets the method for computing the diff result.
Possible values are 'delta', 'delta-abs', 'ratio' and
'wdiff'. Default is 'delta'.
SEE ALSO
--------
linkperf:perf[1]

View file

@ -0,0 +1,48 @@
perf-data(1)
============
NAME
----
perf-data - Data file related processing
SYNOPSIS
--------
[verse]
'perf data' [<common options>] <command> [<options>]",
DESCRIPTION
-----------
Data file related processing.
COMMANDS
--------
convert::
Converts perf data file into another format (only CTF [1] format is support by now).
It's possible to set data-convert debug variable to get debug messages from conversion,
like:
perf --debug data-convert data convert ...
OPTIONS for 'convert'
---------------------
--to-ctf::
Triggers the CTF conversion, specify the path of CTF data directory.
-i::
Specify input perf data file path.
-f::
--force::
Don't complain, do it.
-v::
--verbose::
Be more verbose (show counter open errors, etc).
--all::
Convert all events, including non-sample events (comm, fork, ...), to output.
Default is off, only convert samples.
SEE ALSO
--------
linkperf:perf[1]
[1] Common Trace Format - http://www.efficios.com/ctf

227
Documentation/perf-diff.txt Normal file
View file

@ -0,0 +1,227 @@
perf-diff(1)
============
NAME
----
perf-diff - Read perf.data files and display the differential profile
SYNOPSIS
--------
[verse]
'perf diff' [baseline file] [data file1] [[data file2] ... ]
DESCRIPTION
-----------
This command displays the performance difference amongst two or more perf.data
files captured via perf record.
If no parameters are passed it will assume perf.data.old and perf.data.
The differential profile is displayed only for events matching both
specified perf.data files.
If no parameters are passed the samples will be sorted by dso and symbol.
As the perf.data files could come from different binaries, the symbols addresses
could vary. So perf diff is based on the comparison of the files and
symbols name.
OPTIONS
-------
-D::
--dump-raw-trace::
Dump raw trace in ASCII.
--kallsyms=<file>::
kallsyms pathname
-m::
--modules::
Load module symbols. WARNING: use only with -k and LIVE kernel
-d::
--dsos=::
Only consider symbols in these dsos. CSV that understands
file://filename entries. This option will affect the percentage
of the Baseline/Delta column. See --percentage for more info.
-C::
--comms=::
Only consider symbols in these comms. CSV that understands
file://filename entries. This option will affect the percentage
of the Baseline/Delta column. See --percentage for more info.
-S::
--symbols=::
Only consider these symbols. CSV that understands
file://filename entries. This option will affect the percentage
of the Baseline/Delta column. See --percentage for more info.
-s::
--sort=::
Sort by key(s): pid, comm, dso, symbol, cpu, parent, srcline.
Please see description of --sort in the perf-report man page.
-t::
--field-separator=::
Use a special separator character and don't pad with spaces, replacing
all occurrences of this separator in symbol names (and other output)
with a '.' character, that thus it's the only non valid separator.
-v::
--verbose::
Be verbose, for instance, show the raw counts in addition to the
diff.
-q::
--quiet::
Do not show any message. (Suppress -v)
-f::
--force::
Don't do ownership validation.
--symfs=<directory>::
Look for files with symbols relative to this directory.
-b::
--baseline-only::
Show only items with match in baseline.
-c::
--compute::
Differential computation selection - delta, ratio, wdiff, delta-abs
(default is delta-abs). Default can be changed using diff.compute
config option. See COMPARISON METHODS section for more info.
-p::
--period::
Show period values for both compared hist entries.
-F::
--formula::
Show formula for given computation.
-o::
--order::
Specify compute sorting column number. 0 means sorting by baseline
overhead and 1 (default) means sorting by computed value of column 1
(data from the first file other base baseline). Values more than 1
can be used only if enough data files are provided.
The default value can be set using the diff.order config option.
--percentage::
Determine how to display the overhead percentage of filtered entries.
Filters can be applied by --comms, --dsos and/or --symbols options.
"relative" means it's relative to filtered entries only so that the
sum of shown entries will be always 100%. "absolute" means it retains
the original value before and after the filter is applied.
COMPARISON
----------
The comparison is governed by the baseline file. The baseline perf.data
file is iterated for samples. All other perf.data files specified on
the command line are searched for the baseline sample pair. If the pair
is found, specified computation is made and result is displayed.
All samples from non-baseline perf.data files, that do not match any
baseline entry, are displayed with empty space within baseline column
and possible computation results (delta) in their related column.
Example files samples:
- file A with samples f1, f2, f3, f4, f6
- file B with samples f2, f4, f5
- file C with samples f1, f2, f5
Example output:
x - computation takes place for pair
b - baseline sample percentage
- perf diff A B C
baseline/A compute/B compute/C samples
---------------------------------------
b x f1
b x x f2
b f3
b x f4
b f6
x x f5
- perf diff B A C
baseline/B compute/A compute/C samples
---------------------------------------
b x x f2
b x f4
b x f5
x x f1
x f3
x f6
- perf diff C B A
baseline/C compute/B compute/A samples
---------------------------------------
b x f1
b x x f2
b x f5
x f3
x x f4
x f6
COMPARISON METHODS
------------------
delta
~~~~~
If specified the 'Delta' column is displayed with value 'd' computed as:
d = A->period_percent - B->period_percent
with:
- A/B being matching hist entry from data/baseline file specified
(or perf.data/perf.data.old) respectively.
- period_percent being the % of the hist entry period value within
single data file
- with filtering by -C, -d and/or -S, period_percent might be changed
relative to how entries are filtered. Use --percentage=absolute to
prevent such fluctuation.
delta-abs
~~~~~~~~~
Same as 'delta` method, but sort the result with the absolute values.
ratio
~~~~~
If specified the 'Ratio' column is displayed with value 'r' computed as:
r = A->period / B->period
with:
- A/B being matching hist entry from data/baseline file specified
(or perf.data/perf.data.old) respectively.
- period being the hist entry period value
wdiff:WEIGHT-B,WEIGHT-A
~~~~~~~~~~~~~~~~~~~~~~~
If specified the 'Weighted diff' column is displayed with value 'd' computed as:
d = B->period * WEIGHT-A - A->period * WEIGHT-B
- A/B being matching hist entry from data/baseline file specified
(or perf.data/perf.data.old) respectively.
- period being the hist entry period value
- WEIGHT-A/WEIGHT-B being user supplied weights in the the '-c' option
behind ':' separator like '-c wdiff:1,2'.
- WEIGHT-A being the weight of the data file
- WEIGHT-B being the weight of the baseline data file
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1]

View file

@ -0,0 +1,45 @@
perf-evlist(1)
==============
NAME
----
perf-evlist - List the event names in a perf.data file
SYNOPSIS
--------
[verse]
'perf evlist <options>'
DESCRIPTION
-----------
This command displays the names of events sampled in a perf.data file.
OPTIONS
-------
-i::
--input=::
Input file name. (default: perf.data unless stdin is a fifo)
-f::
--force::
Don't complain, do it.
-F::
--freq=::
Show just the sample frequency used for each event.
-v::
--verbose=::
Show all fields.
-g::
--group::
Show event group information.
--trace-fields::
Show tracepoint field names.
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-list[1],
linkperf:perf-report[1]

View file

@ -0,0 +1,87 @@
perf-ftrace(1)
==============
NAME
----
perf-ftrace - simple wrapper for kernel's ftrace functionality
SYNOPSIS
--------
[verse]
'perf ftrace' <command>
DESCRIPTION
-----------
The 'perf ftrace' command is a simple wrapper of kernel's ftrace
functionality. It only supports single thread tracing currently and
just reads trace_pipe in text and then write it to stdout.
The following options apply to perf ftrace.
OPTIONS
-------
-t::
--tracer=::
Tracer to use: function_graph or function.
-v::
--verbose=::
Verbosity level.
-p::
--pid=::
Trace on existing process id (comma separated list).
-a::
--all-cpus::
Force system-wide collection. Scripts run without a <command>
normally use -a by default, while scripts run with a <command>
normally don't - this option allows the latter to be run in
system-wide mode.
-C::
--cpu=::
Only trace for the list of CPUs provided. Multiple CPUs can
be provided as a comma separated list with no space like: 0,1.
Ranges of CPUs are specified with -: 0-2.
Default is to trace on all online CPUs.
-T::
--trace-funcs=::
Only trace functions given by the argument. Multiple functions
can be given by using this option more than once. The function
argument also can be a glob pattern. It will be passed to
'set_ftrace_filter' in tracefs.
-N::
--notrace-funcs=::
Do not trace functions given by the argument. Like -T option,
this can be used more than once to specify multiple functions
(or glob patterns). It will be passed to 'set_ftrace_notrace'
in tracefs.
-G::
--graph-funcs=::
Set graph filter on the given function (or a glob pattern).
This is useful for the function_graph tracer only and enables
tracing for functions executed from the given function.
This can be used more than once to specify multiple functions.
It will be passed to 'set_graph_function' in tracefs.
-g::
--nograph-funcs=::
Set graph notrace filter on the given function (or a glob pattern).
Like -G option, this is useful for the function_graph tracer only
and disables tracing for function executed from the given function.
This can be used more than once to specify multiple functions.
It will be passed to 'set_graph_notrace' in tracefs.
-D::
--graph-depth=::
Set max depth for function graph tracer to follow
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-trace[1]

View file

@ -0,0 +1,38 @@
perf-help(1)
============
NAME
----
perf-help - display help information about perf
SYNOPSIS
--------
'perf help' [-a|--all] [COMMAND]
DESCRIPTION
-----------
With no options and no COMMAND given, the synopsis of the 'perf'
command and a list of the most commonly used perf commands are printed
on the standard output.
If the option '--all' or '-a' is given, then all available commands are
printed on the standard output.
If a perf command is named, a manual page for that command is brought
up. The 'man' program is used by default for this purpose, but this
can be overridden by other options or configuration variables.
Note that `perf --help ...` is identical to `perf help ...` because the
former is internally converted into the latter.
OPTIONS
-------
-a::
--all::
Prints all the available commands on the standard output. This
option supersedes any other option.
PERF
----
Part of the linkperf:perf[1] suite

View file

@ -0,0 +1,69 @@
perf-inject(1)
==============
NAME
----
perf-inject - Filter to augment the events stream with additional information
SYNOPSIS
--------
[verse]
'perf inject <options>'
DESCRIPTION
-----------
perf-inject reads a perf-record event stream and repipes it to stdout. At any
point the processing code can inject other events into the event stream - in
this case build-ids (-b option) are read and injected as needed into the event
stream.
Build-ids are just the first user of perf-inject - potentially anything that
needs userspace processing to augment the events stream with additional
information could make use of this facility.
OPTIONS
-------
-b::
--build-ids=::
Inject build-ids into the output stream
-v::
--verbose::
Be more verbose.
-i::
--input=::
Input file name. (default: stdin)
-o::
--output=::
Output file name. (default: stdout)
-s::
--sched-stat::
Merge sched_stat and sched_switch for getting events where and how long
tasks slept. sched_switch contains a callchain where a task slept and
sched_stat contains a timeslice how long a task slept.
--kallsyms=<file>::
kallsyms pathname
--itrace::
Decode Instruction Tracing data, replacing it with synthesized events.
Options are:
include::itrace.txt[]
--strip::
Use with --itrace to strip out non-synthesized events.
-j::
--jit::
Process jitdump files by injecting the mmap records corresponding to jitted
functions. This option also generates the ELF images for each jitted function
found in the jitdumps files captured in the input perf.data file. Use this option
if you are monitoring environment using JIT runtimes, such as Java, DART or V8.
-f::
--force::
Don't complain, do it.
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1]

View file

@ -0,0 +1,24 @@
perf-kallsyms(1)
================
NAME
----
perf-kallsyms - Searches running kernel for symbols
SYNOPSIS
--------
[verse]
'perf kallsyms' [<options>] symbol_name[,symbol_name...]
DESCRIPTION
-----------
This command searches the running kernel kallsyms file for the given symbol(s)
and prints information about it, including the DSO, the kallsyms begin/end
addresses and the addresses in the ELF kallsyms symbol table (for symbols in
modules).
OPTIONS
-------
-v::
--verbose=::
Increase verbosity level, showing details about symbol table loading, etc.

View file

@ -0,0 +1,77 @@
perf-kmem(1)
============
NAME
----
perf-kmem - Tool to trace/measure kernel memory properties
SYNOPSIS
--------
[verse]
'perf kmem' {record|stat} [<options>]
DESCRIPTION
-----------
There are two variants of perf kmem:
'perf kmem record <command>' to record the kmem events
of an arbitrary workload.
'perf kmem stat' to report kernel memory statistics.
OPTIONS
-------
-i <file>::
--input=<file>::
Select the input file (default: perf.data unless stdin is a fifo)
-f::
--force::
Don't do ownership validation
-v::
--verbose::
Be more verbose. (show symbol address, etc)
--caller::
Show per-callsite statistics
--alloc::
Show per-allocation statistics
-s <key[,key2...]>::
--sort=<key[,key2...]>::
Sort the output (default: 'frag,hit,bytes' for slab and 'bytes,hit'
for page). Available sort keys are 'ptr, callsite, bytes, hit,
pingpong, frag' for slab and 'page, callsite, bytes, hit, order,
migtype, gfp' for page. This option should be preceded by one of the
mode selection options - i.e. --slab, --page, --alloc and/or --caller.
-l <num>::
--line=<num>::
Print n lines only
--raw-ip::
Print raw ip instead of symbol
--slab::
Analyze SLAB allocator events.
--page::
Analyze page allocator events
--live::
Show live page stat. The perf kmem shows total allocation stat by
default, but this option shows live (currently allocated) pages
instead. (This option works with --page option only)
--time=<start>,<stop>::
Only analyze samples within given time window: <start>,<stop>. Times
have the format seconds.microseconds. If start is not given (i.e., time
string is ',x.y') then analysis starts at the beginning of the file. If
stop time is not given (i.e, time string is 'x.y,') then analysis goes
to end of file.
SEE ALSO
--------
linkperf:perf-record[1]

164
Documentation/perf-kvm.txt Normal file
View file

@ -0,0 +1,164 @@
perf-kvm(1)
===========
NAME
----
perf-kvm - Tool to trace/measure kvm guest os
SYNOPSIS
--------
[verse]
'perf kvm' [--host] [--guest] [--guestmount=<path>
[--guestkallsyms=<path> --guestmodules=<path> | --guestvmlinux=<path>]]
{top|record|report|diff|buildid-list} [<options>]
'perf kvm' [--host] [--guest] [--guestkallsyms=<path> --guestmodules=<path>
| --guestvmlinux=<path>] {top|record|report|diff|buildid-list|stat} [<options>]
'perf kvm stat [record|report|live] [<options>]
DESCRIPTION
-----------
There are a couple of variants of perf kvm:
'perf kvm [options] top <command>' to generates and displays
a performance counter profile of guest os in realtime
of an arbitrary workload.
'perf kvm record <command>' to record the performance counter profile
of an arbitrary workload and save it into a perf data file. We set the
default behavior of perf kvm as --guest, so if neither --host nor --guest
is input, the perf data file name is perf.data.guest. If --host is input,
the perf data file name is perf.data.kvm. If you want to record data into
perf.data.host, please input --host --no-guest. The behaviors are shown as
following:
Default('') -> perf.data.guest
--host -> perf.data.kvm
--guest -> perf.data.guest
--host --guest -> perf.data.kvm
--host --no-guest -> perf.data.host
'perf kvm report' to display the performance counter profile information
recorded via perf kvm record.
'perf kvm diff' to displays the performance difference amongst two perf.data
files captured via perf record.
'perf kvm buildid-list' to display the buildids found in a perf data file,
so that other tools can be used to fetch packages with matching symbol tables
for use by perf report. As buildid is read from /sys/kernel/notes in os, then
if you want to list the buildid for guest, please make sure your perf data file
was captured with --guestmount in perf kvm record.
'perf kvm stat <command>' to run a command and gather performance counter
statistics.
Especially, perf 'kvm stat record/report' generates a statistical analysis
of KVM events. Currently, vmexit, mmio (x86 only) and ioport (x86 only)
events are supported. 'perf kvm stat record <command>' records kvm events
and the events between start and end <command>.
And this command produces a file which contains tracing results of kvm
events.
'perf kvm stat report' reports statistical data which includes events
handled time, samples, and so on.
'perf kvm stat live' reports statistical data in a live mode (similar to
record + report but with statistical data updated live at a given display
rate).
OPTIONS
-------
-i::
--input=<path>::
Input file name.
-o::
--output=<path>::
Output file name.
--host::
Collect host side performance profile.
--guest::
Collect guest side performance profile.
--guestmount=<path>::
Guest os root file system mount directory. Users mounts guest os
root directories under <path> by a specific filesystem access method,
typically, sshfs. For example, start 2 guest os. The one's pid is 8888
and the other's is 9999.
#mkdir ~/guestmount; cd ~/guestmount
#sshfs -o allow_other,direct_io -p 5551 localhost:/ 8888/
#sshfs -o allow_other,direct_io -p 5552 localhost:/ 9999/
#perf kvm --host --guest --guestmount=~/guestmount top
--guestkallsyms=<path>::
Guest os /proc/kallsyms file copy. 'perf' kvm' reads it to get guest
kernel symbols. Users copy it out from guest os.
--guestmodules=<path>::
Guest os /proc/modules file copy. 'perf' kvm' reads it to get guest
kernel module information. Users copy it out from guest os.
--guestvmlinux=<path>::
Guest os kernel vmlinux.
-v::
--verbose::
Be more verbose (show counter open errors, etc).
STAT REPORT OPTIONS
-------------------
--vcpu=<value>::
analyze events which occur on this vcpu. (default: all vcpus)
--event=<value>::
event to be analyzed. Possible values: vmexit, mmio (x86 only),
ioport (x86 only). (default: vmexit)
-k::
--key=<value>::
Sorting key. Possible values: sample (default, sort by samples
number), time (sort by average time).
-p::
--pid=::
Analyze events only for given process ID(s) (comma separated list).
STAT LIVE OPTIONS
-----------------
-d::
--display::
Time in seconds between display updates
-m::
--mmap-pages=::
Number of mmap data pages (must be a power of two) or size
specification with appended unit character - B/K/M/G. The
size is rounded up to have nearest pages power of two value.
-a::
--all-cpus::
System-wide collection from all CPUs.
-p::
--pid=::
Analyze events only for given process ID(s) (comma separated list).
--vcpu=<value>::
analyze events which occur on this vcpu. (default: all vcpus)
--event=<value>::
event to be analyzed. Possible values: vmexit,
mmio (x86 only), ioport (x86 only).
(default: vmexit)
-k::
--key=<value>::
Sorting key. Possible values: sample (default, sort by samples
number), time (sort by average time).
--duration=<value>::
Show events other than HLT (x86 only) or Wait state (s390 only)
that take longer than duration usecs.
--proc-map-timeout::
When processing pre-existing threads /proc/XXX/mmap, it may take
a long time, because the file may be huge. A time out is needed
in such cases.
This option sets the time out limit. The default value is 500 ms.
SEE ALSO
--------
linkperf:perf-top[1], linkperf:perf-record[1], linkperf:perf-report[1],
linkperf:perf-diff[1], linkperf:perf-buildid-list[1],
linkperf:perf-stat[1]

281
Documentation/perf-list.txt Normal file
View file

@ -0,0 +1,281 @@
perf-list(1)
============
NAME
----
perf-list - List all symbolic event types
SYNOPSIS
--------
[verse]
'perf list' [--no-desc] [--long-desc]
[hw|sw|cache|tracepoint|pmu|sdt|metric|metricgroup|event_glob]
DESCRIPTION
-----------
This command displays the symbolic event types which can be selected in the
various perf commands with the -e option.
OPTIONS
-------
--no-desc::
Don't print descriptions.
-v::
--long-desc::
Print longer event descriptions.
--details::
Print how named events are resolved internally into perf events, and also
any extra expressions computed by perf stat.
[[EVENT_MODIFIERS]]
EVENT MODIFIERS
---------------
Events can optionally have a modifier by appending a colon and one or
more modifiers. Modifiers allow the user to restrict the events to be
counted. The following modifiers exist:
u - user-space counting
k - kernel counting
h - hypervisor counting
I - non idle counting
G - guest counting (in KVM guests)
H - host counting (not in KVM guests)
p - precise level
P - use maximum detected precise level
S - read sample value (PERF_SAMPLE_READ)
D - pin the event to the PMU
W - group is weak and will fallback to non-group if not schedulable,
only supported in 'perf stat' for now.
The 'p' modifier can be used for specifying how precise the instruction
address should be. The 'p' modifier can be specified multiple times:
0 - SAMPLE_IP can have arbitrary skid
1 - SAMPLE_IP must have constant skid
2 - SAMPLE_IP requested to have 0 skid
3 - SAMPLE_IP must have 0 skid, or uses randomization to avoid
sample shadowing effects.
For Intel systems precise event sampling is implemented with PEBS
which supports up to precise-level 2, and precise level 3 for
some special cases
On AMD systems it is implemented using IBS (up to precise-level 2).
The precise modifier works with event types 0x76 (cpu-cycles, CPU
clocks not halted) and 0xC1 (micro-ops retired). Both events map to
IBS execution sampling (IBS op) with the IBS Op Counter Control bit
(IbsOpCntCtl) set respectively (see AMD64 Architecture Programmers
Manual Volume 2: System Programming, 13.3 Instruction-Based
Sampling). Examples to use IBS:
perf record -a -e cpu-cycles:p ... # use ibs op counting cycles
perf record -a -e r076:p ... # same as -e cpu-cycles:p
perf record -a -e r0C1:p ... # use ibs op counting micro-ops
RAW HARDWARE EVENT DESCRIPTOR
-----------------------------
Even when an event is not available in a symbolic form within perf right now,
it can be encoded in a per processor specific way.
For instance For x86 CPUs NNN represents the raw register encoding with the
layout of IA32_PERFEVTSELx MSRs (see [Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide] Figure 30-1 Layout
of IA32_PERFEVTSELx MSRs) or AMD's PerfEvtSeln (see [AMD64 Architecture Programmers Manual Volume 2: System Programming], Page 344,
Figure 13-7 Performance Event-Select Register (PerfEvtSeln)).
Note: Only the following bit fields can be set in x86 counter
registers: event, umask, edge, inv, cmask. Esp. guest/host only and
OS/user mode flags must be setup using <<EVENT_MODIFIERS, EVENT
MODIFIERS>>.
Example:
If the Intel docs for a QM720 Core i7 describe an event as:
Event Umask Event Mask
Num. Value Mnemonic Description Comment
A8H 01H LSD.UOPS Counts the number of micro-ops Use cmask=1 and
delivered by loop stream detector invert to count
cycles
raw encoding of 0x1A8 can be used:
perf stat -e r1a8 -a sleep 1
perf record -e r1a8 ...
You should refer to the processor specific documentation for getting these
details. Some of them are referenced in the SEE ALSO section below.
ARBITRARY PMUS
--------------
perf also supports an extended syntax for specifying raw parameters
to PMUs. Using this typically requires looking up the specific event
in the CPU vendor specific documentation.
The available PMUs and their raw parameters can be listed with
ls /sys/devices/*/format
For example the raw event "LSD.UOPS" core pmu event above could
be specified as
perf stat -e cpu/event=0xa8,umask=0x1,name=LSD.UOPS_CYCLES,cmask=1/ ...
PER SOCKET PMUS
---------------
Some PMUs are not associated with a core, but with a whole CPU socket.
Events on these PMUs generally cannot be sampled, but only counted globally
with perf stat -a. They can be bound to one logical CPU, but will measure
all the CPUs in the same socket.
This example measures memory bandwidth every second
on the first memory controller on socket 0 of a Intel Xeon system
perf stat -C 0 -a uncore_imc_0/cas_count_read/,uncore_imc_0/cas_count_write/ -I 1000 ...
Each memory controller has its own PMU. Measuring the complete system
bandwidth would require specifying all imc PMUs (see perf list output),
and adding the values together. To simplify creation of multiple events,
prefix and glob matching is supported in the PMU name, and the prefix
'uncore_' is also ignored when performing the match. So the command above
can be expanded to all memory controllers by using the syntaxes:
perf stat -C 0 -a imc/cas_count_read/,imc/cas_count_write/ -I 1000 ...
perf stat -C 0 -a *imc*/cas_count_read/,*imc*/cas_count_write/ -I 1000 ...
This example measures the combined core power every second
perf stat -I 1000 -e power/energy-cores/ -a
ACCESS RESTRICTIONS
-------------------
For non root users generally only context switched PMU events are available.
This is normally only the events in the cpu PMU, the predefined events
like cycles and instructions and some software events.
Other PMUs and global measurements are normally root only.
Some event qualifiers, such as "any", are also root only.
This can be overriden by setting the kernel.perf_event_paranoid
sysctl to -1, which allows non root to use these events.
For accessing trace point events perf needs to have read access to
/sys/kernel/debug/tracing, even when perf_event_paranoid is in a relaxed
setting.
TRACING
-------
Some PMUs control advanced hardware tracing capabilities, such as Intel PT,
that allows low overhead execution tracing. These are described in a separate
intel-pt.txt document.
PARAMETERIZED EVENTS
--------------------
Some pmu events listed by 'perf-list' will be displayed with '?' in them. For
example:
hv_gpci/dtbp_ptitc,phys_processor_idx=?/
This means that when provided as an event, a value for '?' must
also be supplied. For example:
perf stat -C 0 -e 'hv_gpci/dtbp_ptitc,phys_processor_idx=0x2/' ...
EVENT GROUPS
------------
Perf supports time based multiplexing of events, when the number of events
active exceeds the number of hardware performance counters. Multiplexing
can cause measurement errors when the workload changes its execution
profile.
When metrics are computed using formulas from event counts, it is useful to
ensure some events are always measured together as a group to minimize multiplexing
errors. Event groups can be specified using { }.
perf stat -e '{instructions,cycles}' ...
The number of available performance counters depend on the CPU. A group
cannot contain more events than available counters.
For example Intel Core CPUs typically have four generic performance counters
for the core, plus three fixed counters for instructions, cycles and
ref-cycles. Some special events have restrictions on which counter they
can schedule, and may not support multiple instances in a single group.
When too many events are specified in the group some of them will not
be measured.
Globally pinned events can limit the number of counters available for
other groups. On x86 systems, the NMI watchdog pins a counter by default.
The nmi watchdog can be disabled as root with
echo 0 > /proc/sys/kernel/nmi_watchdog
Events from multiple different PMUs cannot be mixed in a group, with
some exceptions for software events.
LEADER SAMPLING
---------------
perf also supports group leader sampling using the :S specifier.
perf record -e '{cycles,instructions}:S' ...
perf report --group
Normally all events in a event group sample, but with :S only
the first event (the leader) samples, and it only reads the values of the
other events in the group.
OPTIONS
-------
Without options all known events will be listed.
To limit the list use:
. 'hw' or 'hardware' to list hardware events such as cache-misses, etc.
. 'sw' or 'software' to list software events such as context switches, etc.
. 'cache' or 'hwcache' to list hardware cache events such as L1-dcache-loads, etc.
. 'tracepoint' to list all tracepoint events, alternatively use
'subsys_glob:event_glob' to filter by tracepoint subsystems such as sched,
block, etc.
. 'pmu' to print the kernel supplied PMU events.
. 'sdt' to list all Statically Defined Tracepoint events.
. 'metric' to list metrics
. 'metricgroup' to list metricgroups with metrics.
. If none of the above is matched, it will apply the supplied glob to all
events, printing the ones that match.
. As a last resort, it will do a substring search in all event names.
One or more types can be used at the same time, listing the events for the
types specified.
Support raw format:
. '--raw-dump', shows the raw-dump of all the events.
. '--raw-dump [hw|sw|cache|tracepoint|pmu|event_glob]', shows the raw-dump of
a certain kind of events.
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-top[1],
linkperf:perf-record[1],
http://www.intel.com/sdm/[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide],
http://support.amd.com/us/Processor_TechDocs/24593_APM_v2.pdf[AMD64 Architecture Programmers Manual Volume 2: System Programming]

View file

@ -0,0 +1,70 @@
perf-lock(1)
============
NAME
----
perf-lock - Analyze lock events
SYNOPSIS
--------
[verse]
'perf lock' {record|report|script|info}
DESCRIPTION
-----------
You can analyze various lock behaviours
and statistics with this 'perf lock' command.
'perf lock record <command>' records lock events
between start and end <command>. And this command
produces the file "perf.data" which contains tracing
results of lock events.
'perf lock report' reports statistical data.
'perf lock script' shows raw lock events.
'perf lock info' shows metadata like threads or addresses
of lock instances.
COMMON OPTIONS
--------------
-i::
--input=<file>::
Input file name. (default: perf.data unless stdin is a fifo)
-v::
--verbose::
Be more verbose (show symbol address, etc).
-D::
--dump-raw-trace::
Dump raw trace in ASCII.
-f::
--force::
Don't complan, do it.
REPORT OPTIONS
--------------
-k::
--key=<value>::
Sorting key. Possible values: acquired (default), contended,
avg_wait, wait_total, wait_max, wait_min.
INFO OPTIONS
------------
-t::
--threads::
dump thread list in perf.data
-m::
--map::
dump map of lock instances (address:name table)
SEE ALSO
--------
linkperf:perf[1]

View file

@ -0,0 +1,92 @@
perf-mem(1)
===========
NAME
----
perf-mem - Profile memory accesses
SYNOPSIS
--------
[verse]
'perf mem' [<options>] (record [<command>] | report)
DESCRIPTION
-----------
"perf mem record" runs a command and gathers memory operation data
from it, into perf.data. Perf record options are accepted and are passed through.
"perf mem report" displays the result. It invokes perf report with the
right set of options to display a memory access profile. By default, loads
and stores are sampled. Use the -t option to limit to loads or stores.
Note that on Intel systems the memory latency reported is the use-latency,
not the pure load (or store latency). Use latency includes any pipeline
queueing delays in addition to the memory subsystem latency.
OPTIONS
-------
<command>...::
Any command you can specify in a shell.
-i::
--input=<file>::
Input file name.
-f::
--force::
Don't do ownership validation
-t::
--type=<type>::
Select the memory operation type: load or store (default: load,store)
-D::
--dump-raw-samples::
Dump the raw decoded samples on the screen in a format that is easy to parse with
one sample per line.
-x::
--field-separator=<separator>::
Specify the field separator used when dump raw samples (-D option). By default,
The separator is the space character.
-C::
--cpu=<cpu>::
Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a
comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. Default
is to monitor all CPUS.
-U::
--hide-unresolved::
Only display entries resolved to a symbol.
-p::
--phys-data::
Record/Report sample physical addresses
RECORD OPTIONS
--------------
-e::
--event <event>::
Event selector. Use 'perf mem record -e list' to list available events.
-K::
--all-kernel::
Configure all used events to run in kernel space.
-U::
--all-user::
Configure all used events to run in user space.
-v::
--verbose::
Be more verbose (show counter open errors, etc)
--ldlat <n>::
Specify desired latency for loads event.
In addition, for report all perf report options are valid, and for record
all perf record options.
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1]

View file

@ -0,0 +1,299 @@
perf-probe(1)
=============
NAME
----
perf-probe - Define new dynamic tracepoints
SYNOPSIS
--------
[verse]
'perf probe' [options] --add='PROBE' [...]
or
'perf probe' [options] PROBE
or
'perf probe' [options] --del='[GROUP:]EVENT' [...]
or
'perf probe' --list[=[GROUP:]EVENT]
or
'perf probe' [options] --line='LINE'
or
'perf probe' [options] --vars='PROBEPOINT'
or
'perf probe' [options] --funcs
or
'perf probe' [options] --definition='PROBE' [...]
DESCRIPTION
-----------
This command defines dynamic tracepoint events, by symbol and registers
without debuginfo, or by C expressions (C line numbers, C function names,
and C local variables) with debuginfo.
OPTIONS
-------
-k::
--vmlinux=PATH::
Specify vmlinux path which has debuginfo (Dwarf binary).
Only when using this with --definition, you can give an offline
vmlinux file.
-m::
--module=MODNAME|PATH::
Specify module name in which perf-probe searches probe points
or lines. If a path of module file is passed, perf-probe
treat it as an offline module (this means you can add a probe on
a module which has not been loaded yet).
-s::
--source=PATH::
Specify path to kernel source.
-v::
--verbose::
Be more verbose (show parsed arguments, etc).
Can not use with -q.
-q::
--quiet::
Be quiet (do not show any messages including errors).
Can not use with -v.
-a::
--add=::
Define a probe event (see PROBE SYNTAX for detail).
-d::
--del=::
Delete probe events. This accepts glob wildcards('*', '?') and character
classes(e.g. [a-z], [!A-Z]).
-l::
--list[=[GROUP:]EVENT]::
List up current probe events. This can also accept filtering patterns of
event names.
When this is used with --cache, perf shows all cached probes instead of
the live probes.
-L::
--line=::
Show source code lines which can be probed. This needs an argument
which specifies a range of the source code. (see LINE SYNTAX for detail)
-V::
--vars=::
Show available local variables at given probe point. The argument
syntax is same as PROBE SYNTAX, but NO ARGs.
--externs::
(Only for --vars) Show external defined variables in addition to local
variables.
--no-inlines::
(Only for --add) Search only for non-inlined functions. The functions
which do not have instances are ignored.
-F::
--funcs[=FILTER]::
Show available functions in given module or kernel. With -x/--exec,
can also list functions in a user space executable / shared library.
This also can accept a FILTER rule argument.
-D::
--definition=::
Show trace-event definition converted from given probe-event instead
of write it into tracing/[k,u]probe_events.
--filter=FILTER::
(Only for --vars and --funcs) Set filter. FILTER is a combination of glob
pattern, see FILTER PATTERN for detail.
Default FILTER is "!__k???tab_* & !__crc_*" for --vars, and "!_*"
for --funcs.
If several filters are specified, only the last filter is used.
-f::
--force::
Forcibly add events with existing name.
-n::
--dry-run::
Dry run. With this option, --add and --del doesn't execute actual
adding and removal operations.
--cache::
(With --add) Cache the probes. Any events which successfully added
are also stored in the cache file.
(With --list) Show cached probes.
(With --del) Remove cached probes.
--max-probes=NUM::
Set the maximum number of probe points for an event. Default is 128.
--target-ns=PID:
Obtain mount namespace information from the target pid. This is
used when creating a uprobe for a process that resides in a
different mount namespace from the perf(1) utility.
-x::
--exec=PATH::
Specify path to the executable or shared library file for user
space tracing. Can also be used with --funcs option.
--demangle::
Demangle application symbols. --no-demangle is also available
for disabling demangling.
--demangle-kernel::
Demangle kernel symbols. --no-demangle-kernel is also available
for disabling kernel demangling.
In absence of -m/-x options, perf probe checks if the first argument after
the options is an absolute path name. If its an absolute path, perf probe
uses it as a target module/target user space binary to probe.
PROBE SYNTAX
------------
Probe points are defined by following syntax.
1) Define event based on function name
[[GROUP:]EVENT=]FUNC[@SRC][:RLN|+OFFS|%return|;PTN] [ARG ...]
2) Define event based on source file with line number
[[GROUP:]EVENT=]SRC:ALN [ARG ...]
3) Define event based on source file with lazy pattern
[[GROUP:]EVENT=]SRC;PTN [ARG ...]
4) Pre-defined SDT events or cached event with name
%[sdt_PROVIDER:]SDTEVENT
or,
sdt_PROVIDER:SDTEVENT
'EVENT' specifies the name of new event, if omitted, it will be set the name of the probed function, and for return probes, a "\_\_return" suffix is automatically added to the function name. You can also specify a group name by 'GROUP', if omitted, set 'probe' is used for kprobe and 'probe_<bin>' is used for uprobe.
Note that using existing group name can conflict with other events. Especially, using the group name reserved for kernel modules can hide embedded events in the
modules.
'FUNC' specifies a probed function name, and it may have one of the following options; '+OFFS' is the offset from function entry address in bytes, ':RLN' is the relative-line number from function entry line, and '%return' means that it probes function return. And ';PTN' means lazy matching pattern (see LAZY MATCHING). Note that ';PTN' must be the end of the probe point definition. In addition, '@SRC' specifies a source file which has that function.
It is also possible to specify a probe point by the source line number or lazy matching by using 'SRC:ALN' or 'SRC;PTN' syntax, where 'SRC' is the source file path, ':ALN' is the line number and ';PTN' is the lazy matching pattern.
'ARG' specifies the arguments of this probe point, (see PROBE ARGUMENT).
'SDTEVENT' and 'PROVIDER' is the pre-defined event name which is defined by user SDT (Statically Defined Tracing) or the pre-cached probes with event name.
Note that before using the SDT event, the target binary (on which SDT events are defined) must be scanned by linkperf:perf-buildid-cache[1] to make SDT events as cached events.
For details of the SDT, see below.
https://sourceware.org/gdb/onlinedocs/gdb/Static-Probe-Points.html
ESCAPED CHARACTER
-----------------
In the probe syntax, '=', '@', '+', ':' and ';' are treated as a special character. You can use a backslash ('\') to escape the special characters.
This is useful if you need to probe on a specific versioned symbols, like @GLIBC_... suffixes, or also you need to specify a source file which includes the special characters.
Note that usually single backslash is consumed by shell, so you might need to pass double backslash (\\) or wrapping with single quotes (\'AAA\@BBB').
See EXAMPLES how it is used.
PROBE ARGUMENT
--------------
Each probe argument follows below syntax.
[NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE]
'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
'$vars' and '$params' special arguments are also available for NAME, '$vars' is expanded to the local variables (including function parameters) which can access at given probe point. '$params' is expanded to only the function parameters.
'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo (*). Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal integers (x/x8/x16/x32/x64), signedness casting (u/s), "string" and bitfield are supported. (see TYPES for detail)
On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid.
TYPES
-----
Basic types (u8/u16/u32/u64/s8/s16/s32/s64) and hexadecimal integers (x8/x16/x32/x64) are integer types. Prefix 's' and 'u' means those types are signed and unsigned respectively, and 'x' means that is shown in hexadecimal format. Traced arguments are shown in decimal (sNN/uNN) or hex (xNN). You can also use 's' or 'u' to specify only signedness and leave its size auto-detected by perf probe. Moreover, you can use 'x' to explicitly specify to be shown in hexadecimal (the size is also auto-detected).
String type is a special type, which fetches a "null-terminated" string from kernel space. This means it will fail and store NULL if the string container has been paged out. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
Bitfield is another special type, which takes 3 parameters, bit-width, bit-offset, and container-size (usually 32). The syntax is;
b<bit-width>@<bit-offset>/<container-size>
LINE SYNTAX
-----------
Line range is described by following syntax.
"FUNC[@SRC][:RLN[+NUM|-RLN2]]|SRC[:ALN[+NUM|-ALN2]]"
FUNC specifies the function name of showing lines. 'RLN' is the start line
number from function entry line, and 'RLN2' is the end line number. As same as
probe syntax, 'SRC' means the source file path, 'ALN' is start line number,
and 'ALN2' is end line number in the file. It is also possible to specify how
many lines to show by using 'NUM'. Moreover, 'FUNC@SRC' combination is good
for searching a specific function when several functions share same name.
So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function.
LAZY MATCHING
-------------
The lazy line matching is similar to glob matching but ignoring spaces in both of pattern and target. So this accepts wildcards('*', '?') and character classes(e.g. [a-z], [!A-Z]).
e.g.
'a=*' can matches 'a=b', 'a = b', 'a == b' and so on.
This provides some sort of flexibility and robustness to probe point definitions against minor code changes. For example, actual 10th line of schedule() can be moved easily by modifying schedule(), but the same line matching 'rq=cpu_rq*' may still exist in the function.)
FILTER PATTERN
--------------
The filter pattern is a glob matching pattern(s) to filter variables.
In addition, you can use "!" for specifying filter-out rule. You also can give several rules combined with "&" or "|", and fold those rules as one rule by using "(" ")".
e.g.
With --filter "foo* | bar*", perf probe -V shows variables which start with "foo" or "bar".
With --filter "!foo* & *bar", perf probe -V shows variables which don't start with "foo" and end with "bar", like "fizzbar". But "foobar" is filtered out.
EXAMPLES
--------
Display which lines in schedule() can be probed:
./perf probe --line schedule
Add a probe on schedule() function 12th line with recording cpu local variable:
./perf probe schedule:12 cpu
or
./perf probe --add='schedule:12 cpu'
Add one or more probes which has the name start with "schedule".
./perf probe schedule*
or
./perf probe --add='schedule*'
Add probes on lines in schedule() function which calls update_rq_clock().
./perf probe 'schedule;update_rq_clock*'
or
./perf probe --add='schedule;update_rq_clock*'
Delete all probes on schedule().
./perf probe --del='schedule*'
Add probes at zfree() function on /bin/zsh
./perf probe -x /bin/zsh zfree or ./perf probe /bin/zsh zfree
Add probes at malloc() function on libc
./perf probe -x /lib/libc.so.6 malloc or ./perf probe /lib/libc.so.6 malloc
Add a uprobe to a target process running in a different mount namespace
./perf probe --target-ns <target pid> -x /lib64/libc.so.6 malloc
Add a USDT probe to a target process running in a different mount namespace
./perf probe --target-ns <target pid> -x /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.121-0.b13.el7_3.x86_64/jre/lib/amd64/server/libjvm.so %sdt_hotspot:thread__sleep__end
Add a probe on specific versioned symbol by backslash escape
./perf probe -x /lib64/libc-2.25.so 'malloc_get_state\@GLIBC_2.2.5'
Add a probe in a source file using special characters by backslash escape
./perf probe -x /opt/test/a.out 'foo\+bar.c:4'
SEE ALSO
--------
linkperf:perf-trace[1], linkperf:perf-record[1], linkperf:perf-buildid-cache[1]

View file

@ -0,0 +1,502 @@
perf-record(1)
==============
NAME
----
perf-record - Run a command and record its profile into perf.data
SYNOPSIS
--------
[verse]
'perf record' [-e <EVENT> | --event=EVENT] [-a] <command>
'perf record' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>]
DESCRIPTION
-----------
This command runs a command and gathers a performance counter profile
from it, into perf.data - without displaying anything.
This file can then be inspected later on, using 'perf report'.
OPTIONS
-------
<command>...::
Any command you can specify in a shell.
-e::
--event=::
Select the PMU event. Selection can be:
- a symbolic event name (use 'perf list' to list all events)
- a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
hexadecimal event descriptor.
- a symbolically formed PMU event like 'pmu/param1=0x3,param2/' where
'param1', 'param2', etc are defined as formats for the PMU in
/sys/bus/event_source/devices/<pmu>/format/*.
- a symbolically formed event like 'pmu/config=M,config1=N,config3=K/'
where M, N, K are numbers (in decimal, hex, octal format). Acceptable
values for each of 'config', 'config1' and 'config2' are defined by
corresponding entries in /sys/bus/event_source/devices/<pmu>/format/*
param1 and param2 are defined as formats for the PMU in:
/sys/bus/event_source/devices/<pmu>/format/*
There are also some parameters which are not defined in .../<pmu>/format/*.
These params can be used to overload default config values per event.
Here are some common parameters:
- 'period': Set event sampling period
- 'freq': Set event sampling frequency
- 'time': Disable/enable time stamping. Acceptable values are 1 for
enabling time stamping. 0 for disabling time stamping.
The default is 1.
- 'call-graph': Disable/enable callgraph. Acceptable str are "fp" for
FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode and
"no" for disable callgraph.
- 'stack-size': user stack size for dwarf mode
See the linkperf:perf-list[1] man page for more parameters.
Note: If user explicitly sets options which conflict with the params,
the value set by the parameters will be overridden.
Also not defined in .../<pmu>/format/* are PMU driver specific
configuration parameters. Any configuration parameter preceded by
the letter '@' is not interpreted in user space and sent down directly
to the PMU driver. For example:
perf record -e some_event/@cfg1,@cfg2=config/ ...
will see 'cfg1' and 'cfg2=config' pushed to the PMU driver associated
with the event for further processing. There is no restriction on
what the configuration parameters are, as long as their semantic is
understood and supported by the PMU driver.
- a hardware breakpoint event in the form of '\mem:addr[/len][:access]'
where addr is the address in memory you want to break in.
Access is the memory access type (read, write, execute) it can
be passed as follows: '\mem:addr[:[r][w][x]]'. len is the range,
number of bytes from specified addr, which the breakpoint will cover.
If you want to profile read-write accesses in 0x1000, just set
'mem:0x1000:rw'.
If you want to profile write accesses in [0x1000~1008), just set
'mem:0x1000/8:w'.
- a group of events surrounded by a pair of brace ("{event1,event2,...}").
Each event is separated by commas and the group should be quoted to
prevent the shell interpretation. You also need to use --group on
"perf report" to view group events together.
--filter=<filter>::
Event filter. This option should follow a event selector (-e) which
selects either tracepoint event(s) or a hardware trace PMU
(e.g. Intel PT or CoreSight).
- tracepoint filters
In the case of tracepoints, multiple '--filter' options are combined
using '&&'.
- address filters
A hardware trace PMU advertises its ability to accept a number of
address filters by specifying a non-zero value in
/sys/bus/event_source/devices/<pmu>/nr_addr_filters.
Address filters have the format:
filter|start|stop|tracestop <start> [/ <size>] [@<file name>]
Where:
- 'filter': defines a region that will be traced.
- 'start': defines an address at which tracing will begin.
- 'stop': defines an address at which tracing will stop.
- 'tracestop': defines a region in which tracing will stop.
<file name> is the name of the object file, <start> is the offset to the
code to trace in that file, and <size> is the size of the region to
trace. 'start' and 'stop' filters need not specify a <size>.
If no object file is specified then the kernel is assumed, in which case
the start address must be a current kernel memory address.
<start> can also be specified by providing the name of a symbol. If the
symbol name is not unique, it can be disambiguated by inserting #n where
'n' selects the n'th symbol in address order. Alternately #0, #g or #G
select only a global symbol. <size> can also be specified by providing
the name of a symbol, in which case the size is calculated to the end
of that symbol. For 'filter' and 'tracestop' filters, if <size> is
omitted and <start> is a symbol, then the size is calculated to the end
of that symbol.
If <size> is omitted and <start> is '*', then the start and size will
be calculated from the first and last symbols, i.e. to trace the whole
file.
If symbol names (or '*') are provided, they must be surrounded by white
space.
The filter passed to the kernel is not necessarily the same as entered.
To see the filter that is passed, use the -v option.
The kernel may not be able to configure a trace region if it is not
within a single mapping. MMAP events (or /proc/<pid>/maps) can be
examined to determine if that is a possibility.
Multiple filters can be separated with space or comma.
--exclude-perf::
Don't record events issued by perf itself. This option should follow
a event selector (-e) which selects tracepoint event(s). It adds a
filter expression 'common_pid != $PERFPID' to filters. If other
'--filter' exists, the new filter expression will be combined with
them by '&&'.
-a::
--all-cpus::
System-wide collection from all CPUs (default if no target is specified).
-p::
--pid=::
Record events on existing process ID (comma separated list).
-t::
--tid=::
Record events on existing thread ID (comma separated list).
This option also disables inheritance by default. Enable it by adding
--inherit.
-u::
--uid=::
Record events in threads owned by uid. Name or number.
-r::
--realtime=::
Collect data with this RT SCHED_FIFO priority.
--no-buffering::
Collect data without buffering.
-c::
--count=::
Event period to sample.
-o::
--output=::
Output file name.
-i::
--no-inherit::
Child tasks do not inherit counters.
-F::
--freq=::
Profile at this frequency. Use 'max' to use the currently maximum
allowed frequency, i.e. the value in the kernel.perf_event_max_sample_rate
sysctl. Will throttle down to the currently maximum allowed frequency.
See --strict-freq.
--strict-freq::
Fail if the specified frequency can't be used.
-m::
--mmap-pages=::
Number of mmap data pages (must be a power of two) or size
specification with appended unit character - B/K/M/G. The
size is rounded up to have nearest pages power of two value.
Also, by adding a comma, the number of mmap pages for AUX
area tracing can be specified.
--group::
Put all events in a single event group. This precedes the --event
option and remains only for backward compatibility. See --event.
-g::
Enables call-graph (stack chain/backtrace) recording.
--call-graph::
Setup and enable call-graph (stack chain/backtrace) recording,
implies -g. Default is "fp".
Allows specifying "fp" (frame pointer) or "dwarf"
(DWARF's CFI - Call Frame Information) or "lbr"
(Hardware Last Branch Record facility) as the method to collect
the information used to show the call graphs.
In some systems, where binaries are build with gcc
--fomit-frame-pointer, using the "fp" method will produce bogus
call graphs, using "dwarf", if available (perf tools linked to
the libunwind or libdw library) should be used instead.
Using the "lbr" method doesn't require any compiler options. It
will produce call graphs from the hardware LBR registers. The
main limitation is that it is only available on new Intel
platforms, such as Haswell. It can only get user call chain. It
doesn't work with branch stack sampling at the same time.
When "dwarf" recording is used, perf also records (user) stack dump
when sampled. Default size of the stack dump is 8192 (bytes).
User can change the size by passing the size after comma like
"--call-graph dwarf,4096".
-q::
--quiet::
Don't print any message, useful for scripting.
-v::
--verbose::
Be more verbose (show counter open errors, etc).
-s::
--stat::
Record per-thread event counts. Use it with 'perf report -T' to see
the values.
-d::
--data::
Record the sample virtual addresses.
--phys-data::
Record the sample physical addresses.
-T::
--timestamp::
Record the sample timestamps. Use it with 'perf report -D' to see the
timestamps, for instance.
-P::
--period::
Record the sample period.
--sample-cpu::
Record the sample cpu.
-n::
--no-samples::
Don't sample.
-R::
--raw-samples::
Collect raw sample records from all opened counters (default for tracepoint counters).
-C::
--cpu::
Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a
comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
In per-thread mode with inheritance mode on (default), samples are captured only when
the thread executes on the designated CPUs. Default is to monitor all CPUs.
-B::
--no-buildid::
Do not save the build ids of binaries in the perf.data files. This skips
post processing after recording, which sometimes makes the final step in
the recording process to take a long time, as it needs to process all
events looking for mmap records. The downside is that it can misresolve
symbols if the workload binaries used when recording get locally rebuilt
or upgraded, because the only key available in this case is the
pathname. You can also set the "record.build-id" config variable to
'skip to have this behaviour permanently.
-N::
--no-buildid-cache::
Do not update the buildid cache. This saves some overhead in situations
where the information in the perf.data file (which includes buildids)
is sufficient. You can also set the "record.build-id" config variable to
'no-cache' to have the same effect.
-G name,...::
--cgroup name,...::
monitor only in the container (cgroup) called "name". This option is available only
in per-cpu mode. The cgroup filesystem must be mounted. All threads belonging to
container "name" are monitored when they run on the monitored CPUs. Multiple cgroups
can be provided. Each cgroup is applied to the corresponding event, i.e., first cgroup
to first event, second cgroup to second event and so on. It is possible to provide
an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must have
corresponding events, i.e., they always refer to events defined earlier on the command
line. If the user wants to track multiple events for a specific cgroup, the user can
use '-e e1 -e e2 -G foo,foo' or just use '-e e1 -e e2 -G foo'.
If wanting to monitor, say, 'cycles' for a cgroup and also for system wide, this
command line can be used: 'perf stat -e cycles -G cgroup_name -a -e cycles'.
-b::
--branch-any::
Enable taken branch stack sampling. Any type of taken branch may be sampled.
This is a shortcut for --branch-filter any. See --branch-filter for more infos.
-j::
--branch-filter::
Enable taken branch stack sampling. Each sample captures a series of consecutive
taken branches. The number of branches captured with each sample depends on the
underlying hardware, the type of branches of interest, and the executed code.
It is possible to select the types of branches captured by enabling filters. The
following filters are defined:
- any: any type of branches
- any_call: any function call or system call
- any_ret: any function return or system call return
- ind_call: any indirect branch
- call: direct calls, including far (to/from kernel) calls
- u: only when the branch target is at the user level
- k: only when the branch target is in the kernel
- hv: only when the target is at the hypervisor level
- in_tx: only when the target is in a hardware transaction
- no_tx: only when the target is not in a hardware transaction
- abort_tx: only when the target is a hardware transaction abort
- cond: conditional branches
- save_type: save branch type during sampling in case binary is not available later
+
The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
The privilege levels may be omitted, in which case, the privilege levels of the associated
event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
levels are subject to permissions. When sampling on multiple events, branch stack sampling
is enabled for all the sampling events. The sampled branch type is the same for all events.
The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
Note that this feature may not be available on all processors.
--weight::
Enable weightened sampling. An additional weight is recorded per sample and can be
displayed with the weight and local_weight sort keys. This currently works for TSX
abort events and some memory events in precise mode on modern Intel CPUs.
--namespaces::
Record events of type PERF_RECORD_NAMESPACES.
--transaction::
Record transaction flags for transaction related events.
--per-thread::
Use per-thread mmaps. By default per-cpu mmaps are created. This option
overrides that and uses per-thread mmaps. A side-effect of that is that
inheritance is automatically disabled. --per-thread is ignored with a warning
if combined with -a or -C options.
-D::
--delay=::
After starting the program, wait msecs before measuring. This is useful to
filter out the startup phase of the program, which is often very different.
-I::
--intr-regs::
Capture machine state (registers) at interrupt, i.e., on counter overflows for
each sample. List of captured registers depends on the architecture. This option
is off by default. It is possible to select the registers to sample using their
symbolic names, e.g. on x86, ax, si. To list the available registers use
--intr-regs=\?. To name registers, pass a comma separated list such as
--intr-regs=ax,bx. The list of register is architecture dependent.
--user-regs::
Capture user registers at sample time. Same arguments as -I.
--running-time::
Record running and enabled time for read events (:S)
-k::
--clockid::
Sets the clock id to use for the various time fields in the perf_event_type
records. See clock_gettime(). In particular CLOCK_MONOTONIC and
CLOCK_MONOTONIC_RAW are supported, some events might also allow
CLOCK_BOOTTIME, CLOCK_REALTIME and CLOCK_TAI.
-S::
--snapshot::
Select AUX area tracing Snapshot Mode. This option is valid only with an
AUX area tracing event. Optionally the number of bytes to capture per
snapshot can be specified. In Snapshot Mode, trace data is captured only when
signal SIGUSR2 is received.
--proc-map-timeout::
When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
because the file may be huge. A time out is needed in such cases.
This option sets the time out limit. The default value is 500 ms.
--switch-events::
Record context switch events i.e. events of type PERF_RECORD_SWITCH or
PERF_RECORD_SWITCH_CPU_WIDE.
--clang-path=PATH::
Path to clang binary to use for compiling BPF scriptlets.
(enabled when BPF support is on)
--clang-opt=OPTIONS::
Options passed to clang when compiling BPF scriptlets.
(enabled when BPF support is on)
--vmlinux=PATH::
Specify vmlinux path which has debuginfo.
(enabled when BPF prologue is on)
--buildid-all::
Record build-id of all DSOs regardless whether it's actually hit or not.
--all-kernel::
Configure all used events to run in kernel space.
--all-user::
Configure all used events to run in user space.
--timestamp-filename
Append timestamp to output file name.
--timestamp-boundary::
Record timestamp boundary (time of first/last samples).
--switch-output[=mode]::
Generate multiple perf.data files, timestamp prefixed, switching to a new one
based on 'mode' value:
"signal" - when receiving a SIGUSR2 (default value) or
<size> - when reaching the size threshold, size is expected to
be a number with appended unit character - B/K/M/G
<time> - when reaching the time threshold, size is expected to
be a number with appended unit character - s/m/h/d
Note: the precision of the size threshold hugely depends
on your configuration - the number and size of your ring
buffers (-m). It is generally more precise for higher sizes
(like >5M), for lower values expect different sizes.
A possible use case is to, given an external event, slice the perf.data file
that gets then processed, possibly via a perf script, to decide if that
particular perf.data snapshot should be kept or not.
Implies --timestamp-filename, --no-buildid and --no-buildid-cache.
The reason for the latter two is to reduce the data file switching
overhead. You can still switch them on with:
--switch-output --no-no-buildid --no-no-buildid-cache
--dry-run::
Parse options then exit. --dry-run can be used to detect errors in cmdline
options.
'perf record --dry-run -e' can act as a BPF script compiler if llvm.dump-obj
in config file is set to true.
--tail-synthesize::
Instead of collecting non-sample events (for example, fork, comm, mmap) at
the beginning of record, collect them during finalizing an output file.
The collected non-sample events reflects the status of the system when
record is finished.
--overwrite::
Makes all events use an overwritable ring buffer. An overwritable ring
buffer works like a flight recorder: when it gets full, the kernel will
overwrite the oldest records, that thus will never make it to the
perf.data file.
When '--overwrite' and '--switch-output' are used perf records and drops
events until it receives a signal, meaning that something unusual was
detected that warrants taking a snapshot of the most current events,
those fitting in the ring buffer at that moment.
'overwrite' attribute can also be set or canceled for an event using
config terms. For example: 'cycles/overwrite/' and 'instructions/no-overwrite/'.
Implies --tail-synthesize.
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1]

View file

@ -0,0 +1,484 @@
perf-report(1)
==============
NAME
----
perf-report - Read perf.data (created by perf record) and display the profile
SYNOPSIS
--------
[verse]
'perf report' [-i <file> | --input=file]
DESCRIPTION
-----------
This command displays the performance counter profile information recorded
via perf record.
OPTIONS
-------
-i::
--input=::
Input file name. (default: perf.data unless stdin is a fifo)
-v::
--verbose::
Be more verbose. (show symbol address, etc)
-q::
--quiet::
Do not show any message. (Suppress -v)
-n::
--show-nr-samples::
Show the number of samples for each symbol
--show-cpu-utilization::
Show sample percentage for different cpu modes.
-T::
--threads::
Show per-thread event counters. The input data file should be recorded
with -s option.
-c::
--comms=::
Only consider symbols in these comms. CSV that understands
file://filename entries. This option will affect the percentage of
the overhead column. See --percentage for more info.
--pid=::
Only show events for given process ID (comma separated list).
--tid=::
Only show events for given thread ID (comma separated list).
-d::
--dsos=::
Only consider symbols in these dsos. CSV that understands
file://filename entries. This option will affect the percentage of
the overhead column. See --percentage for more info.
-S::
--symbols=::
Only consider these symbols. CSV that understands
file://filename entries. This option will affect the percentage of
the overhead column. See --percentage for more info.
--symbol-filter=::
Only show symbols that match (partially) with this filter.
-U::
--hide-unresolved::
Only display entries resolved to a symbol.
-s::
--sort=::
Sort histogram entries by given key(s) - multiple keys can be specified
in CSV format. Following sort keys are available:
pid, comm, dso, symbol, parent, cpu, socket, srcline, weight,
local_weight, cgroup_id.
Each key has following meaning:
- comm: command (name) of the task which can be read via /proc/<pid>/comm
- pid: command and tid of the task
- dso: name of library or module executed at the time of sample
- dso_size: size of library or module executed at the time of sample
- symbol: name of function executed at the time of sample
- symbol_size: size of function executed at the time of sample
- parent: name of function matched to the parent regex filter. Unmatched
entries are displayed as "[other]".
- cpu: cpu number the task ran at the time of sample
- socket: processor socket number the task ran at the time of sample
- srcline: filename and line number executed at the time of sample. The
DWARF debugging info must be provided.
- srcfile: file name of the source file of the same. Requires dwarf
information.
- weight: Event specific weight, e.g. memory latency or transaction
abort cost. This is the global weight.
- local_weight: Local weight version of the weight above.
- cgroup_id: ID derived from cgroup namespace device and inode numbers.
- transaction: Transaction abort flags.
- overhead: Overhead percentage of sample
- overhead_sys: Overhead percentage of sample running in system mode
- overhead_us: Overhead percentage of sample running in user mode
- overhead_guest_sys: Overhead percentage of sample running in system mode
on guest machine
- overhead_guest_us: Overhead percentage of sample running in user mode on
guest machine
- sample: Number of sample
- period: Raw number of event count of sample
By default, comm, dso and symbol keys are used.
(i.e. --sort comm,dso,symbol)
If --branch-stack option is used, following sort keys are also
available:
- dso_from: name of library or module branched from
- dso_to: name of library or module branched to
- symbol_from: name of function branched from
- symbol_to: name of function branched to
- srcline_from: source file and line branched from
- srcline_to: source file and line branched to
- mispredict: "N" for predicted branch, "Y" for mispredicted branch
- in_tx: branch in TSX transaction
- abort: TSX transaction abort.
- cycles: Cycles in basic block
And default sort keys are changed to comm, dso_from, symbol_from, dso_to
and symbol_to, see '--branch-stack'.
If the --mem-mode option is used, the following sort keys are also available
(incompatible with --branch-stack):
symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline.
- symbol_daddr: name of data symbol being executed on at the time of sample
- dso_daddr: name of library or module containing the data being executed
on at the time of the sample
- locked: whether the bus was locked at the time of the sample
- tlb: type of tlb access for the data at the time of the sample
- mem: type of memory access for the data at the time of the sample
- snoop: type of snoop (if any) for the data at the time of the sample
- dcacheline: the cacheline the data address is on at the time of the sample
- phys_daddr: physical address of data being executed on at the time of sample
And the default sort keys are changed to local_weight, mem, sym, dso,
symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'.
If the data file has tracepoint event(s), following (dynamic) sort keys
are also available:
trace, trace_fields, [<event>.]<field>[/raw]
- trace: pretty printed trace output in a single column
- trace_fields: fields in tracepoints in separate columns
- <field name>: optional event and field name for a specific field
The last form consists of event and field names. If event name is
omitted, it searches all events for matching field name. The matched
field will be shown only for the event has the field. The event name
supports substring match so user doesn't need to specify full subsystem
and event name everytime. For example, 'sched:sched_switch' event can
be shortened to 'switch' as long as it's not ambiguous. Also event can
be specified by its index (starting from 1) preceded by the '%'.
So '%1' is the first event, '%2' is the second, and so on.
The field name can have '/raw' suffix which disables pretty printing
and shows raw field value like hex numbers. The --raw-trace option
has the same effect for all dynamic sort keys.
The default sort keys are changed to 'trace' if all events in the data
file are tracepoint.
-F::
--fields=::
Specify output field - multiple keys can be specified in CSV format.
Following fields are available:
overhead, overhead_sys, overhead_us, overhead_children, sample and period.
Also it can contain any sort key(s).
By default, every sort keys not specified in -F will be appended
automatically.
If the keys starts with a prefix '+', then it will append the specified
field(s) to the default field order. For example: perf report -F +period,sample.
-p::
--parent=<regex>::
A regex filter to identify parent. The parent is a caller of this
function and searched through the callchain, thus it requires callchain
information recorded. The pattern is in the extended regex format and
defaults to "\^sys_|^do_page_fault", see '--sort parent'.
-x::
--exclude-other::
Only display entries with parent-match.
-w::
--column-widths=<width[,width...]>::
Force each column width to the provided list, for large terminal
readability. 0 means no limit (default behavior).
-t::
--field-separator=::
Use a special separator character and don't pad with spaces, replacing
all occurrences of this separator in symbol names (and other output)
with a '.' character, that thus it's the only non valid separator.
-D::
--dump-raw-trace::
Dump raw trace in ASCII.
-g::
--call-graph=<print_type,threshold[,print_limit],order,sort_key[,branch],value>::
Display call chains using type, min percent threshold, print limit,
call order, sort key, optional branch and value. Note that ordering
is not fixed so any parameter can be given in an arbitrary order.
One exception is the print_limit which should be preceded by threshold.
print_type can be either:
- flat: single column, linear exposure of call chains.
- graph: use a graph tree, displaying absolute overhead rates. (default)
- fractal: like graph, but displays relative rates. Each branch of
the tree is considered as a new profiled object.
- folded: call chains are displayed in a line, separated by semicolons
- none: disable call chain display.
threshold is a percentage value which specifies a minimum percent to be
included in the output call graph. Default is 0.5 (%).
print_limit is only applied when stdio interface is used. It's to limit
number of call graph entries in a single hist entry. Note that it needs
to be given after threshold (but not necessarily consecutive).
Default is 0 (unlimited).
order can be either:
- callee: callee based call graph.
- caller: inverted caller based call graph.
Default is 'caller' when --children is used, otherwise 'callee'.
sort_key can be:
- function: compare on functions (default)
- address: compare on individual code addresses
- srcline: compare on source filename and line number
branch can be:
- branch: include last branch information in callgraph when available.
Usually more convenient to use --branch-history for this.
value can be:
- percent: diplay overhead percent (default)
- period: display event period
- count: display event count
--children::
Accumulate callchain of children to parent entry so that then can
show up in the output. The output will have a new "Children" column
and will be sorted on the data. It requires callchains are recorded.
See the `overhead calculation' section for more details. Enabled by
default, disable with --no-children.
--max-stack::
Set the stack depth limit when parsing the callchain, anything
beyond the specified depth will be ignored. This is a trade-off
between information loss and faster processing especially for
workloads that can have a very long callchain stack.
Note that when using the --itrace option the synthesized callchain size
will override this value if the synthesized callchain size is bigger.
Default: 127
-G::
--inverted::
alias for inverted caller based call graph.
--ignore-callees=<regex>::
Ignore callees of the function(s) matching the given regex.
This has the effect of collecting the callers of each such
function into one place in the call-graph tree.
--pretty=<key>::
Pretty printing style. key: normal, raw
--stdio:: Use the stdio interface.
--stdio-color::
'always', 'never' or 'auto', allowing configuring color output
via the command line, in addition to via "color.ui" .perfconfig.
Use '--stdio-color always' to generate color even when redirecting
to a pipe or file. Using just '--stdio-color' is equivalent to
using 'always'.
--tui:: Use the TUI interface, that is integrated with annotate and allows
zooming into DSOs or threads, among other features. Use of --tui
requires a tty, if one is not present, as when piping to other
commands, the stdio interface is used.
--gtk:: Use the GTK2 interface.
-k::
--vmlinux=<file>::
vmlinux pathname
--ignore-vmlinux::
Ignore vmlinux files.
--kallsyms=<file>::
kallsyms pathname
-m::
--modules::
Load module symbols. WARNING: This should only be used with -k and
a LIVE kernel.
-f::
--force::
Don't do ownership validation.
--symfs=<directory>::
Look for files with symbols relative to this directory.
-C::
--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
be provided as a comma-separated list with no space: 0,1. Ranges of
CPUs are specified with -: 0-2. Default is to report samples on all
CPUs.
-M::
--disassembler-style=:: Set disassembler style for objdump.
--source::
Interleave source code with assembly code. Enabled by default,
disable with --no-source.
--asm-raw::
Show raw instruction encoding of assembly instructions.
--show-total-period:: Show a column with the sum of periods.
-I::
--show-info::
Display extended information about the perf.data file. This adds
information which may be very large and thus may clutter the display.
It currently includes: cpu and numa topology of the host system.
-b::
--branch-stack::
Use the addresses of sampled taken branches instead of the instruction
address to build the histograms. To generate meaningful output, the
perf.data file must have been obtained using perf record -b or
perf record --branch-filter xxx where xxx is a branch filter option.
perf report is able to auto-detect whether a perf.data file contains
branch stacks and it will automatically switch to the branch view mode,
unless --no-branch-stack is used.
--branch-history::
Add the addresses of sampled taken branches to the callstack.
This allows to examine the path the program took to each sample.
The data collection must have used -b (or -j) and -g.
--objdump=<path>::
Path to objdump binary.
--group::
Show event group information together. It forces group output also
if there are no groups defined in data file.
--demangle::
Demangle symbol names to human readable form. It's enabled by default,
disable with --no-demangle.
--demangle-kernel::
Demangle kernel symbol names to human readable form (for C++ kernels).
--mem-mode::
Use the data addresses of samples in addition to instruction addresses
to build the histograms. To generate meaningful output, the perf.data
file must have been obtained using perf record -d -W and using a
special event -e cpu/mem-loads/p or -e cpu/mem-stores/p. See
'perf mem' for simpler access.
--percent-limit::
Do not show entries which have an overhead under that percent.
(Default: 0). Note that this option also sets the percent limit (threshold)
of callchains. However the default value of callchain threshold is
different than the default value of hist entries. Please see the
--call-graph option for details.
--percentage::
Determine how to display the overhead percentage of filtered entries.
Filters can be applied by --comms, --dsos and/or --symbols options and
Zoom operations on the TUI (thread, dso, etc).
"relative" means it's relative to filtered entries only so that the
sum of shown entries will be always 100%. "absolute" means it retains
the original value before and after the filter is applied.
--header::
Show header information in the perf.data file. This includes
various information like hostname, OS and perf version, cpu/mem
info, perf command line, event list and so on. Currently only
--stdio output supports this feature.
--header-only::
Show only perf.data header (forces --stdio).
--time::
Only analyze samples within given time window: <start>,<stop>. Times
have the format seconds.microseconds. If start is not given (i.e., time
string is ',x.y') then analysis starts at the beginning of the file. If
stop time is not given (i.e, time string is 'x.y,') then analysis goes
to end of file.
Also support time percent with multiple time range. Time string is
'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'.
For example:
Select the second 10% time slice:
perf report --time 10%/2
Select from 0% to 10% time slice:
perf report --time 0%-10%
Select the first and second 10% time slices:
perf report --time 10%/1,10%/2
Select from 0% to 10% and 30% to 40% slices:
perf report --time 0%-10%,30%-40%
--itrace::
Options for decoding instruction tracing data. The options are:
include::itrace.txt[]
To disable decoding entirely, use --no-itrace.
--full-source-path::
Show the full path for source files for srcline output.
--show-ref-call-graph::
When multiple events are sampled, it may not be needed to collect
callgraphs for all of them. The sample sites are usually nearby,
and it's enough to collect the callgraphs on a reference event.
So user can use "call-graph=no" event modifier to disable callgraph
for other events to reduce the overhead.
However, perf report cannot show callgraphs for the event which
disable the callgraph.
This option extends the perf report to show reference callgraphs,
which collected by reference event, in no callgraph event.
--socket-filter::
Only report the samples on the processor socket that match with this filter
--raw-trace::
When displaying traceevent output, do not use print fmt or plugins.
--hierarchy::
Enable hierarchical output.
--inline::
If a callgraph address belongs to an inlined function, the inline stack
will be printed. Each entry is function name or file/line. Enabled by
default, disable with --no-inline.
--mmaps::
Show --tasks output plus mmap information in a format similar to
/proc/<PID>/maps.
Please note that not all mmaps are stored, options affecting which ones
are include 'perf record --data', for instance.
--stats::
Display overall events statistics without any further processing.
(like the one at the end of the perf report -D command)
--tasks::
Display monitored tasks stored in perf data. Displaying pid/tid/ppid
plus the command string aligned to distinguish parent and child tasks.
include::callchain-overhead-calculation.txt[]
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-annotate[1], linkperf:perf-record[1]

View file

@ -0,0 +1,167 @@
perf-sched(1)
=============
NAME
----
perf-sched - Tool to trace/measure scheduler properties (latencies)
SYNOPSIS
--------
[verse]
'perf sched' {record|latency|map|replay|script|timehist}
DESCRIPTION
-----------
There are several variants of 'perf sched':
'perf sched record <command>' to record the scheduling events
of an arbitrary workload.
'perf sched latency' to report the per task scheduling latencies
and other scheduling properties of the workload.
'perf sched script' to see a detailed trace of the workload that
was recorded (aliased to 'perf script' for now).
'perf sched replay' to simulate the workload that was recorded
via perf sched record. (this is done by starting up mockup threads
that mimic the workload based on the events in the trace. These
threads can then replay the timings (CPU runtime and sleep patterns)
of the workload as it occurred when it was recorded - and can repeat
it a number of times, measuring its performance.)
'perf sched map' to print a textual context-switching outline of
workload captured via perf sched record. Columns stand for
individual CPUs, and the two-letter shortcuts stand for tasks that
are running on a CPU. A '*' denotes the CPU that had the event, and
a dot signals an idle CPU.
'perf sched timehist' provides an analysis of scheduling events.
Example usage:
perf sched record -- sleep 1
perf sched timehist
By default it shows the individual schedule events, including the wait
time (time between sched-out and next sched-in events for the task), the
task scheduling delay (time between wakeup and actually running) and run
time for the task:
time cpu task name wait time sch delay run time
[tid/pid] (msec) (msec) (msec)
-------------- ------ -------------------- --------- --------- ---------
79371.874569 [0011] gcc[31949] 0.014 0.000 1.148
79371.874591 [0010] gcc[31951] 0.000 0.000 0.024
79371.874603 [0010] migration/10[59] 3.350 0.004 0.011
79371.874604 [0011] <idle> 1.148 0.000 0.035
79371.874723 [0005] <idle> 0.016 0.000 1.383
79371.874746 [0005] gcc[31949] 0.153 0.078 0.022
...
Times are in msec.usec.
OPTIONS
-------
-i::
--input=<file>::
Input file name. (default: perf.data unless stdin is a fifo)
-v::
--verbose::
Be more verbose. (show symbol address, etc)
-D::
--dump-raw-trace=::
Display verbose dump of the sched data.
-f::
--force::
Don't complain, do it.
OPTIONS for 'perf sched map'
----------------------------
--compact::
Show only CPUs with activity. Helps visualizing on high core
count systems.
--cpus::
Show just entries with activities for the given CPUs.
--color-cpus::
Highlight the given cpus.
--color-pids::
Highlight the given pids.
OPTIONS for 'perf sched timehist'
---------------------------------
-k::
--vmlinux=<file>::
vmlinux pathname
--kallsyms=<file>::
kallsyms pathname
-g::
--call-graph::
Display call chains if present (default on).
--max-stack::
Maximum number of functions to display in backtrace, default 5.
-p=::
--pid=::
Only show events for given process ID (comma separated list).
-t=::
--tid=::
Only show events for given thread ID (comma separated list).
-s::
--summary::
Show only a summary of scheduling by thread with min, max, and average
run times (in sec) and relative stddev.
-S::
--with-summary::
Show all scheduling events followed by a summary by thread with min,
max, and average run times (in sec) and relative stddev.
--symfs=<directory>::
Look for files with symbols relative to this directory.
-V::
--cpu-visual::
Show visual aid for sched switches by CPU: 'i' marks idle time,
's' are scheduler events.
-w::
--wakeups::
Show wakeup events.
-M::
--migrations::
Show migration events.
-n::
--next::
Show next task.
-I::
--idle-hist::
Show idle-related events only.
--time::
Only analyze samples within given time window: <start>,<stop>. Times
have the format seconds.microseconds. If start is not given (i.e., time
string is ',x.y') then analysis starts at the beginning of the file. If
stop time is not given (i.e, time string is 'x.y,') then analysis goes
to end of file.
--state::
Show task state when it switched out.
SEE ALSO
--------
linkperf:perf-record[1]

View file

@ -0,0 +1,216 @@
perf-script-perl(1)
===================
NAME
----
perf-script-perl - Process trace data with a Perl script
SYNOPSIS
--------
[verse]
'perf script' [-s [Perl]:script[.pl] ]
DESCRIPTION
-----------
This perf script option is used to process perf script data using perf's
built-in Perl interpreter. It reads and processes the input file and
displays the results of the trace analysis implemented in the given
Perl script, if any.
STARTER SCRIPTS
---------------
You can avoid reading the rest of this document by running 'perf script
-g perl' in the same directory as an existing perf.data trace file.
That will generate a starter script containing a handler for each of
the event types in the trace file; it simply prints every available
field for each event in the trace file.
You can also look at the existing scripts in
~/libexec/perf-core/scripts/perl for typical examples showing how to
do basic things like aggregate event data, print results, etc. Also,
the check-perf-script.pl script, while not interesting for its results,
attempts to exercise all of the main scripting features.
EVENT HANDLERS
--------------
When perf script is invoked using a trace script, a user-defined
'handler function' is called for each event in the trace. If there's
no handler function defined for a given event type, the event is
ignored (or passed to a 'trace_unhandled' function, see below) and the
next event is processed.
Most of the event's field values are passed as arguments to the
handler function; some of the less common ones aren't - those are
available as calls back into the perf executable (see below).
As an example, the following perf record command can be used to record
all sched_wakeup events in the system:
# perf record -a -e sched:sched_wakeup
Traces meant to be processed using a script should be recorded with
the above option: -a to enable system-wide collection.
The format file for the sched_wakep event defines the following fields
(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
----
format:
field:unsigned short common_type;
field:unsigned char common_flags;
field:unsigned char common_preempt_count;
field:int common_pid;
field:char comm[TASK_COMM_LEN];
field:pid_t pid;
field:int prio;
field:int success;
field:int target_cpu;
----
The handler function for this event would be defined as:
----
sub sched::sched_wakeup
{
my ($event_name, $context, $common_cpu, $common_secs,
$common_nsecs, $common_pid, $common_comm,
$comm, $pid, $prio, $success, $target_cpu) = @_;
}
----
The handler function takes the form subsystem::event_name.
The $common_* arguments in the handler's argument list are the set of
arguments passed to all event handlers; some of the fields correspond
to the common_* fields in the format file, but some are synthesized,
and some of the common_* fields aren't common enough to to be passed
to every event as arguments but are available as library functions.
Here's a brief description of each of the invariant event args:
$event_name the name of the event as text
$context an opaque 'cookie' used in calls back into perf
$common_cpu the cpu the event occurred on
$common_secs the secs portion of the event timestamp
$common_nsecs the nsecs portion of the event timestamp
$common_pid the pid of the current task
$common_comm the name of the current process
All of the remaining fields in the event's format file have
counterparts as handler function arguments of the same name, as can be
seen in the example above.
The above provides the basics needed to directly access every field of
every event in a trace, which covers 90% of what you need to know to
write a useful trace script. The sections below cover the rest.
SCRIPT LAYOUT
-------------
Every perf script Perl script should start by setting up a Perl module
search path and 'use'ing a few support modules (see module
descriptions below):
----
use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
use lib "./Perf-Trace-Util/lib";
use Perf::Trace::Core;
use Perf::Trace::Context;
use Perf::Trace::Util;
----
The rest of the script can contain handler functions and support
functions in any order.
Aside from the event handler functions discussed above, every script
can implement a set of optional functions:
*trace_begin*, if defined, is called before any event is processed and
gives scripts a chance to do setup tasks:
----
sub trace_begin
{
}
----
*trace_end*, if defined, is called after all events have been
processed and gives scripts a chance to do end-of-script tasks, such
as display results:
----
sub trace_end
{
}
----
*trace_unhandled*, if defined, is called after for any event that
doesn't have a handler explicitly defined for it. The standard set
of common arguments are passed into it:
----
sub trace_unhandled
{
my ($event_name, $context, $common_cpu, $common_secs,
$common_nsecs, $common_pid, $common_comm) = @_;
}
----
The remaining sections provide descriptions of each of the available
built-in perf script Perl modules and their associated functions.
AVAILABLE MODULES AND FUNCTIONS
-------------------------------
The following sections describe the functions and variables available
via the various Perf::Trace::* Perl modules. To use the functions and
variables from the given module, add the corresponding 'use
Perf::Trace::XXX' line to your perf script script.
Perf::Trace::Core Module
~~~~~~~~~~~~~~~~~~~~~~~~
These functions provide some essential functions to user scripts.
The *flag_str* and *symbol_str* functions provide human-readable
strings for flag and symbolic fields. These correspond to the strings
and values parsed from the 'print fmt' fields of the event format
files:
flag_str($event_name, $field_name, $field_value) - returns the string representation corresponding to $field_value for the flag field $field_name of event $event_name
symbol_str($event_name, $field_name, $field_value) - returns the string representation corresponding to $field_value for the symbolic field $field_name of event $event_name
Perf::Trace::Context Module
~~~~~~~~~~~~~~~~~~~~~~~~~~~
Some of the 'common' fields in the event format file aren't all that
common, but need to be made accessible to user scripts nonetheless.
Perf::Trace::Context defines a set of functions that can be used to
access this data in the context of the current event. Each of these
functions expects a $context variable, which is the same as the
$context variable passed into every event handler as the second
argument.
common_pc($context) - returns common_preempt count for the current event
common_flags($context) - returns common_flags for the current event
common_lock_depth($context) - returns common_lock_depth for the current event
Perf::Trace::Util Module
~~~~~~~~~~~~~~~~~~~~~~~~
Various utility functions for use with perf script:
nsecs($secs, $nsecs) - returns total nsecs given secs/nsecs pair
nsecs_secs($nsecs) - returns whole secs portion given nsecs
nsecs_nsecs($nsecs) - returns nsecs remainder given nsecs
nsecs_str($nsecs) - returns printable string in the form secs.nsecs
avg($total, $n) - returns average given a sum and a total number of values
SEE ALSO
--------
linkperf:perf-script[1]

View file

@ -0,0 +1,615 @@
perf-script-python(1)
====================
NAME
----
perf-script-python - Process trace data with a Python script
SYNOPSIS
--------
[verse]
'perf script' [-s [Python]:script[.py] ]
DESCRIPTION
-----------
This perf script option is used to process perf script data using perf's
built-in Python interpreter. It reads and processes the input file and
displays the results of the trace analysis implemented in the given
Python script, if any.
A QUICK EXAMPLE
---------------
This section shows the process, start to finish, of creating a working
Python script that aggregates and extracts useful information from a
raw perf script stream. You can avoid reading the rest of this
document if an example is enough for you; the rest of the document
provides more details on each step and lists the library functions
available to script writers.
This example actually details the steps that were used to create the
'syscall-counts' script you see when you list the available perf script
scripts via 'perf script -l'. As such, this script also shows how to
integrate your script into the list of general-purpose 'perf script'
scripts listed by that command.
The syscall-counts script is a simple script, but demonstrates all the
basic ideas necessary to create a useful script. Here's an example
of its output (syscall names are not yet supported, they will appear
as numbers):
----
syscall events:
event count
---------------------------------------- -----------
sys_write 455067
sys_getdents 4072
sys_close 3037
sys_swapoff 1769
sys_read 923
sys_sched_setparam 826
sys_open 331
sys_newfstat 326
sys_mmap 217
sys_munmap 216
sys_futex 141
sys_select 102
sys_poll 84
sys_setitimer 12
sys_writev 8
15 8
sys_lseek 7
sys_rt_sigprocmask 6
sys_wait4 3
sys_ioctl 3
sys_set_robust_list 1
sys_exit 1
56 1
sys_access 1
----
Basically our task is to keep a per-syscall tally that gets updated
every time a system call occurs in the system. Our script will do
that, but first we need to record the data that will be processed by
that script. Theoretically, there are a couple of ways we could do
that:
- we could enable every event under the tracing/events/syscalls
directory, but this is over 600 syscalls, well beyond the number
allowable by perf. These individual syscall events will however be
useful if we want to later use the guidance we get from the
general-purpose scripts to drill down and get more detail about
individual syscalls of interest.
- we can enable the sys_enter and/or sys_exit syscalls found under
tracing/events/raw_syscalls. These are called for all syscalls; the
'id' field can be used to distinguish between individual syscall
numbers.
For this script, we only need to know that a syscall was entered; we
don't care how it exited, so we'll use 'perf record' to record only
the sys_enter events:
----
# perf record -a -e raw_syscalls:sys_enter
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 56.545 MB perf.data (~2470503 samples) ]
----
The options basically say to collect data for every syscall event
system-wide and multiplex the per-cpu output into a single stream.
That single stream will be recorded in a file in the current directory
called perf.data.
Once we have a perf.data file containing our data, we can use the -g
'perf script' option to generate a Python script that will contain a
callback handler for each event type found in the perf.data trace
stream (for more details, see the STARTER SCRIPTS section).
----
# perf script -g python
generated Python script: perf-script.py
The output file created also in the current directory is named
perf-script.py. Here's the file in its entirety:
# perf script event handlers, generated by perf script -g python
# Licensed under the terms of the GNU GPL License version 2
# The common_* event handler fields are the most useful fields common to
# all events. They don't necessarily correspond to the 'common_*' fields
# in the format files. Those fields not available as handler params can
# be retrieved using Python functions of the form common_*(context).
# See the perf-script-python Documentation for the list of available functions.
import os
import sys
sys.path.append(os.environ['PERF_EXEC_PATH'] + \
'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
from perf_trace_context import *
from Core import *
def trace_begin():
print "in trace_begin"
def trace_end():
print "in trace_end"
def raw_syscalls__sys_enter(event_name, context, common_cpu,
common_secs, common_nsecs, common_pid, common_comm,
id, args):
print_header(event_name, common_cpu, common_secs, common_nsecs,
common_pid, common_comm)
print "id=%d, args=%s\n" % \
(id, args),
def trace_unhandled(event_name, context, event_fields_dict):
print ' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())])
def print_header(event_name, cpu, secs, nsecs, pid, comm):
print "%-20s %5u %05u.%09u %8u %-20s " % \
(event_name, cpu, secs, nsecs, pid, comm),
----
At the top is a comment block followed by some import statements and a
path append which every perf script script should include.
Following that are a couple generated functions, trace_begin() and
trace_end(), which are called at the beginning and the end of the
script respectively (for more details, see the SCRIPT_LAYOUT section
below).
Following those are the 'event handler' functions generated one for
every event in the 'perf record' output. The handler functions take
the form subsystem__event_name, and contain named parameters, one for
each field in the event; in this case, there's only one event,
raw_syscalls__sys_enter(). (see the EVENT HANDLERS section below for
more info on event handlers).
The final couple of functions are, like the begin and end functions,
generated for every script. The first, trace_unhandled(), is called
every time the script finds an event in the perf.data file that
doesn't correspond to any event handler in the script. This could
mean either that the record step recorded event types that it wasn't
really interested in, or the script was run against a trace file that
doesn't correspond to the script.
The script generated by -g option simply prints a line for each
event found in the trace stream i.e. it basically just dumps the event
and its parameter values to stdout. The print_header() function is
simply a utility function used for that purpose. Let's rename the
script and run it to see the default output:
----
# mv perf-script.py syscall-counts.py
# perf script -s syscall-counts.py
raw_syscalls__sys_enter 1 00840.847582083 7506 perf id=1, args=
raw_syscalls__sys_enter 1 00840.847595764 7506 perf id=1, args=
raw_syscalls__sys_enter 1 00840.847620860 7506 perf id=1, args=
raw_syscalls__sys_enter 1 00840.847710478 6533 npviewer.bin id=78, args=
raw_syscalls__sys_enter 1 00840.847719204 6533 npviewer.bin id=142, args=
raw_syscalls__sys_enter 1 00840.847755445 6533 npviewer.bin id=3, args=
raw_syscalls__sys_enter 1 00840.847775601 6533 npviewer.bin id=3, args=
raw_syscalls__sys_enter 1 00840.847781820 6533 npviewer.bin id=3, args=
.
.
.
----
Of course, for this script, we're not interested in printing every
trace event, but rather aggregating it in a useful way. So we'll get
rid of everything to do with printing as well as the trace_begin() and
trace_unhandled() functions, which we won't be using. That leaves us
with this minimalistic skeleton:
----
import os
import sys
sys.path.append(os.environ['PERF_EXEC_PATH'] + \
'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
from perf_trace_context import *
from Core import *
def trace_end():
print "in trace_end"
def raw_syscalls__sys_enter(event_name, context, common_cpu,
common_secs, common_nsecs, common_pid, common_comm,
id, args):
----
In trace_end(), we'll simply print the results, but first we need to
generate some results to print. To do that we need to have our
sys_enter() handler do the necessary tallying until all events have
been counted. A hash table indexed by syscall id is a good way to
store that information; every time the sys_enter() handler is called,
we simply increment a count associated with that hash entry indexed by
that syscall id:
----
syscalls = autodict()
try:
syscalls[id] += 1
except TypeError:
syscalls[id] = 1
----
The syscalls 'autodict' object is a special kind of Python dictionary
(implemented in Core.py) that implements Perl's 'autovivifying' hashes
in Python i.e. with autovivifying hashes, you can assign nested hash
values without having to go to the trouble of creating intermediate
levels if they don't exist e.g syscalls[comm][pid][id] = 1 will create
the intermediate hash levels and finally assign the value 1 to the
hash entry for 'id' (because the value being assigned isn't a hash
object itself, the initial value is assigned in the TypeError
exception. Well, there may be a better way to do this in Python but
that's what works for now).
Putting that code into the raw_syscalls__sys_enter() handler, we
effectively end up with a single-level dictionary keyed on syscall id
and having the counts we've tallied as values.
The print_syscall_totals() function iterates over the entries in the
dictionary and displays a line for each entry containing the syscall
name (the dictionary keys contain the syscall ids, which are passed to
the Util function syscall_name(), which translates the raw syscall
numbers to the corresponding syscall name strings). The output is
displayed after all the events in the trace have been processed, by
calling the print_syscall_totals() function from the trace_end()
handler called at the end of script processing.
The final script producing the output shown above is shown in its
entirety below (syscall_name() helper is not yet available, you can
only deal with id's for now):
----
import os
import sys
sys.path.append(os.environ['PERF_EXEC_PATH'] + \
'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
from perf_trace_context import *
from Core import *
from Util import *
syscalls = autodict()
def trace_end():
print_syscall_totals()
def raw_syscalls__sys_enter(event_name, context, common_cpu,
common_secs, common_nsecs, common_pid, common_comm,
id, args):
try:
syscalls[id] += 1
except TypeError:
syscalls[id] = 1
def print_syscall_totals():
if for_comm is not None:
print "\nsyscall events for %s:\n\n" % (for_comm),
else:
print "\nsyscall events:\n\n",
print "%-40s %10s\n" % ("event", "count"),
print "%-40s %10s\n" % ("----------------------------------------", \
"-----------"),
for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \
reverse = True):
print "%-40s %10d\n" % (syscall_name(id), val),
----
The script can be run just as before:
# perf script -s syscall-counts.py
So those are the essential steps in writing and running a script. The
process can be generalized to any tracepoint or set of tracepoints
you're interested in - basically find the tracepoint(s) you're
interested in by looking at the list of available events shown by
'perf list' and/or look in /sys/kernel/debug/tracing/events/ for
detailed event and field info, record the corresponding trace data
using 'perf record', passing it the list of interesting events,
generate a skeleton script using 'perf script -g python' and modify the
code to aggregate and display it for your particular needs.
After you've done that you may end up with a general-purpose script
that you want to keep around and have available for future use. By
writing a couple of very simple shell scripts and putting them in the
right place, you can have your script listed alongside the other
scripts listed by the 'perf script -l' command e.g.:
----
# perf script -l
List of available trace scripts:
wakeup-latency system-wide min/max/avg wakeup latency
rw-by-file <comm> r/w activity for a program, by file
rw-by-pid system-wide r/w activity
----
A nice side effect of doing this is that you also then capture the
probably lengthy 'perf record' command needed to record the events for
the script.
To have the script appear as a 'built-in' script, you write two simple
scripts, one for recording and one for 'reporting'.
The 'record' script is a shell script with the same base name as your
script, but with -record appended. The shell script should be put
into the perf/scripts/python/bin directory in the kernel source tree.
In that script, you write the 'perf record' command-line needed for
your script:
----
# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-record
#!/bin/bash
perf record -a -e raw_syscalls:sys_enter
----
The 'report' script is also a shell script with the same base name as
your script, but with -report appended. It should also be located in
the perf/scripts/python/bin directory. In that script, you write the
'perf script -s' command-line needed for running your script:
----
# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-report
#!/bin/bash
# description: system-wide syscall counts
perf script -s ~/libexec/perf-core/scripts/python/syscall-counts.py
----
Note that the location of the Python script given in the shell script
is in the libexec/perf-core/scripts/python directory - this is where
the script will be copied by 'make install' when you install perf.
For the installation to install your script there, your script needs
to be located in the perf/scripts/python directory in the kernel
source tree:
----
# ls -al kernel-source/tools/perf/scripts/python
total 32
drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 .
drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 ..
drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin
-rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-script.py
drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 Perf-Trace-Util
-rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py
----
Once you've done that (don't forget to do a new 'make install',
otherwise your script won't show up at run-time), 'perf script -l'
should show a new entry for your script:
----
# perf script -l
List of available trace scripts:
wakeup-latency system-wide min/max/avg wakeup latency
rw-by-file <comm> r/w activity for a program, by file
rw-by-pid system-wide r/w activity
syscall-counts system-wide syscall counts
----
You can now perform the record step via 'perf script record':
# perf script record syscall-counts
and display the output using 'perf script report':
# perf script report syscall-counts
STARTER SCRIPTS
---------------
You can quickly get started writing a script for a particular set of
trace data by generating a skeleton script using 'perf script -g
python' in the same directory as an existing perf.data trace file.
That will generate a starter script containing a handler for each of
the event types in the trace file; it simply prints every available
field for each event in the trace file.
You can also look at the existing scripts in
~/libexec/perf-core/scripts/python for typical examples showing how to
do basic things like aggregate event data, print results, etc. Also,
the check-perf-script.py script, while not interesting for its results,
attempts to exercise all of the main scripting features.
EVENT HANDLERS
--------------
When perf script is invoked using a trace script, a user-defined
'handler function' is called for each event in the trace. If there's
no handler function defined for a given event type, the event is
ignored (or passed to a 'trace_unhandled' function, see below) and the
next event is processed.
Most of the event's field values are passed as arguments to the
handler function; some of the less common ones aren't - those are
available as calls back into the perf executable (see below).
As an example, the following perf record command can be used to record
all sched_wakeup events in the system:
# perf record -a -e sched:sched_wakeup
Traces meant to be processed using a script should be recorded with
the above option: -a to enable system-wide collection.
The format file for the sched_wakep event defines the following fields
(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
----
format:
field:unsigned short common_type;
field:unsigned char common_flags;
field:unsigned char common_preempt_count;
field:int common_pid;
field:char comm[TASK_COMM_LEN];
field:pid_t pid;
field:int prio;
field:int success;
field:int target_cpu;
----
The handler function for this event would be defined as:
----
def sched__sched_wakeup(event_name, context, common_cpu, common_secs,
common_nsecs, common_pid, common_comm,
comm, pid, prio, success, target_cpu):
pass
----
The handler function takes the form subsystem__event_name.
The common_* arguments in the handler's argument list are the set of
arguments passed to all event handlers; some of the fields correspond
to the common_* fields in the format file, but some are synthesized,
and some of the common_* fields aren't common enough to to be passed
to every event as arguments but are available as library functions.
Here's a brief description of each of the invariant event args:
event_name the name of the event as text
context an opaque 'cookie' used in calls back into perf
common_cpu the cpu the event occurred on
common_secs the secs portion of the event timestamp
common_nsecs the nsecs portion of the event timestamp
common_pid the pid of the current task
common_comm the name of the current process
All of the remaining fields in the event's format file have
counterparts as handler function arguments of the same name, as can be
seen in the example above.
The above provides the basics needed to directly access every field of
every event in a trace, which covers 90% of what you need to know to
write a useful trace script. The sections below cover the rest.
SCRIPT LAYOUT
-------------
Every perf script Python script should start by setting up a Python
module search path and 'import'ing a few support modules (see module
descriptions below):
----
import os
import sys
sys.path.append(os.environ['PERF_EXEC_PATH'] + \
'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
from perf_trace_context import *
from Core import *
----
The rest of the script can contain handler functions and support
functions in any order.
Aside from the event handler functions discussed above, every script
can implement a set of optional functions:
*trace_begin*, if defined, is called before any event is processed and
gives scripts a chance to do setup tasks:
----
def trace_begin():
pass
----
*trace_end*, if defined, is called after all events have been
processed and gives scripts a chance to do end-of-script tasks, such
as display results:
----
def trace_end():
pass
----
*trace_unhandled*, if defined, is called after for any event that
doesn't have a handler explicitly defined for it. The standard set
of common arguments are passed into it:
----
def trace_unhandled(event_name, context, event_fields_dict):
pass
----
The remaining sections provide descriptions of each of the available
built-in perf script Python modules and their associated functions.
AVAILABLE MODULES AND FUNCTIONS
-------------------------------
The following sections describe the functions and variables available
via the various perf script Python modules. To use the functions and
variables from the given module, add the corresponding 'from XXXX
import' line to your perf script script.
Core.py Module
~~~~~~~~~~~~~~
These functions provide some essential functions to user scripts.
The *flag_str* and *symbol_str* functions provide human-readable
strings for flag and symbolic fields. These correspond to the strings
and values parsed from the 'print fmt' fields of the event format
files:
flag_str(event_name, field_name, field_value) - returns the string representation corresponding to field_value for the flag field field_name of event event_name
symbol_str(event_name, field_name, field_value) - returns the string representation corresponding to field_value for the symbolic field field_name of event event_name
The *autodict* function returns a special kind of Python
dictionary that implements Perl's 'autovivifying' hashes in Python
i.e. with autovivifying hashes, you can assign nested hash values
without having to go to the trouble of creating intermediate levels if
they don't exist.
autodict() - returns an autovivifying dictionary instance
perf_trace_context Module
~~~~~~~~~~~~~~~~~~~~~~~~~
Some of the 'common' fields in the event format file aren't all that
common, but need to be made accessible to user scripts nonetheless.
perf_trace_context defines a set of functions that can be used to
access this data in the context of the current event. Each of these
functions expects a context variable, which is the same as the
context variable passed into every event handler as the second
argument.
common_pc(context) - returns common_preempt count for the current event
common_flags(context) - returns common_flags for the current event
common_lock_depth(context) - returns common_lock_depth for the current event
Util.py Module
~~~~~~~~~~~~~~
Various utility functions for use with perf script:
nsecs(secs, nsecs) - returns total nsecs given secs/nsecs pair
nsecs_secs(nsecs) - returns whole secs portion given nsecs
nsecs_nsecs(nsecs) - returns nsecs remainder given nsecs
nsecs_str(nsecs) - returns printable string in the form secs.nsecs
avg(total, n) - returns average given a sum and a total number of values
SEE ALSO
--------
linkperf:perf-script[1]

View file

@ -0,0 +1,389 @@
perf-script(1)
=============
NAME
----
perf-script - Read perf.data (created by perf record) and display trace output
SYNOPSIS
--------
[verse]
'perf script' [<options>]
'perf script' [<options>] record <script> [<record-options>] <command>
'perf script' [<options>] report <script> [script-args]
'perf script' [<options>] <script> <required-script-args> [<record-options>] <command>
'perf script' [<options>] <top-script> [script-args]
DESCRIPTION
-----------
This command reads the input file and displays the trace recorded.
There are several variants of perf script:
'perf script' to see a detailed trace of the workload that was
recorded.
You can also run a set of pre-canned scripts that aggregate and
summarize the raw trace data in various ways (the list of scripts is
available via 'perf script -l'). The following variants allow you to
record and run those scripts:
'perf script record <script> <command>' to record the events required
for 'perf script report'. <script> is the name displayed in the
output of 'perf script --list' i.e. the actual script name minus any
language extension. If <command> is not specified, the events are
recorded using the -a (system-wide) 'perf record' option.
'perf script report <script> [args]' to run and display the results
of <script>. <script> is the name displayed in the output of 'perf
script --list' i.e. the actual script name minus any language
extension. The perf.data output from a previous run of 'perf script
record <script>' is used and should be present for this command to
succeed. [args] refers to the (mainly optional) args expected by
the script.
'perf script <script> <required-script-args> <command>' to both
record the events required for <script> and to run the <script>
using 'live-mode' i.e. without writing anything to disk. <script>
is the name displayed in the output of 'perf script --list' i.e. the
actual script name minus any language extension. If <command> is
not specified, the events are recorded using the -a (system-wide)
'perf record' option. If <script> has any required args, they
should be specified before <command>. This mode doesn't allow for
optional script args to be specified; if optional script args are
desired, they can be specified using separate 'perf script record'
and 'perf script report' commands, with the stdout of the record step
piped to the stdin of the report script, using the '-o -' and '-i -'
options of the corresponding commands.
'perf script <top-script>' to both record the events required for
<top-script> and to run the <top-script> using 'live-mode'
i.e. without writing anything to disk. <top-script> is the name
displayed in the output of 'perf script --list' i.e. the actual
script name minus any language extension; a <top-script> is defined
as any script name ending with the string 'top'.
[<record-options>] can be passed to the record steps of 'perf script
record' and 'live-mode' variants; this isn't possible however for
<top-script> 'live-mode' or 'perf script report' variants.
See the 'SEE ALSO' section for links to language-specific
information on how to write and run your own trace scripts.
OPTIONS
-------
<command>...::
Any command you can specify in a shell.
-D::
--dump-raw-trace=::
Display verbose dump of the trace data.
-L::
--Latency=::
Show latency attributes (irqs/preemption disabled, etc).
-l::
--list=::
Display a list of available trace scripts.
-s ['lang']::
--script=::
Process trace data with the given script ([lang]:script[.ext]).
If the string 'lang' is specified in place of a script name, a
list of supported languages will be displayed instead.
-g::
--gen-script=::
Generate perf-script.[ext] starter script for given language,
using current perf.data.
-a::
Force system-wide collection. Scripts run without a <command>
normally use -a by default, while scripts run with a <command>
normally don't - this option allows the latter to be run in
system-wide mode.
-i::
--input=::
Input file name. (default: perf.data unless stdin is a fifo)
-d::
--debug-mode::
Do various checks like samples ordering and lost events.
-F::
--fields::
Comma separated list of fields to print. Options are:
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc.
Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace
perf script -F <fields>
is equivalent to:
perf script -F trace:<fields> -F sw:<fields> -F hw:<fields>
i.e., the specified fields apply to all event types if the type string
is not given.
In addition to overriding fields, it is also possible to add or remove
fields from the defaults. For example
-F -cpu,+insn
removes the cpu field and adds the insn field. Adding/removing fields
cannot be mixed with normal overriding.
The arguments are processed in the order received. A later usage can
reset a prior request. e.g.:
-F trace: -F comm,tid,time,ip,sym
The first -F suppresses trace events (field list is ""), but then the
second invocation sets the fields to comm,tid,time,ip,sym. In this case a
warning is given to the user:
"Overriding previous field request for all events."
Alternatively, consider the order:
-F comm,tid,time,ip,sym -F trace:
The first -F sets the fields for all events and the second -F
suppresses trace events. The user is given a warning message about
the override, and the result of the above is that only S/W and H/W
events are displayed with the given fields.
For the 'wildcard' option if a user selected field is invalid for an
event type, a message is displayed to the user that the option is
ignored for that type. For example:
$ perf script -F comm,tid,trace
'trace' not valid for hardware events. Ignoring.
'trace' not valid for software events. Ignoring.
Alternatively, if the type is given an invalid field is specified it
is an error. For example:
perf script -v -F sw:comm,tid,trace
'trace' not valid for software events.
At this point usage is displayed, and perf-script exits.
The flags field is synthesized and may have a value when Instruction
Trace decoding. The flags are "bcrosyiABEx" which stand for branch,
call, return, conditional, system, asynchronous, interrupt,
transaction abort, trace begin, trace end, and in transaction,
respectively. Known combinations of flags are printed more nicely e.g.
"call" for "bc", "return" for "br", "jcc" for "bo", "jmp" for "b",
"int" for "bci", "iret" for "bri", "syscall" for "bcs", "sysret" for "brs",
"async" for "by", "hw int" for "bcyi", "tx abrt" for "bA", "tr strt" for "bB",
"tr end" for "bE". However the "x" flag will be display separately in those
cases e.g. "jcc (x)" for a condition branch within a transaction.
The callindent field is synthesized and may have a value when
Instruction Trace decoding. For calls and returns, it will display the
name of the symbol indented with spaces to reflect the stack depth.
When doing instruction trace decoding insn and insnlen give the
instruction bytes and the instruction length of the current
instruction.
The synth field is used by synthesized events which may be created when
Instruction Trace decoding.
Finally, a user may not set fields to none for all event types.
i.e., -F "" is not allowed.
The brstack output includes branch related information with raw addresses using the
/v/v/v/v/cycles syntax in the following order:
FROM: branch source instruction
TO : branch target instruction
M/P/-: M=branch target mispredicted or branch direction was mispredicted, P=target predicted or direction predicted, -=not supported
X/- : X=branch inside a transactional region, -=not in transaction region or not supported
A/- : A=TSX abort entry, -=not aborted region or not supported
cycles
The brstacksym is identical to brstack, except that the FROM and TO addresses are printed in a symbolic form if possible.
When brstackinsn is specified the full assembler sequences of branch sequences for each sample
is printed. This is the full execution path leading to the sample. This is only supported when the
sample was recorded with perf record -b or -j any.
The brstackoff field will print an offset into a specific dso/binary.
With the metric option perf script can compute metrics for
sampling periods, similar to perf stat. This requires
specifying a group with multiple metrics with the :S option
for perf record. perf will sample on the first event, and
compute metrics for all the events in the group. Please note
that the metric computed is averaged over the whole sampling
period, not just for the sample point.
For sample events it's possible to display misc field with -F +misc option,
following letters are displayed for each bit:
PERF_RECORD_MISC_KERNEL K
PERF_RECORD_MISC_USER U
PERF_RECORD_MISC_HYPERVISOR H
PERF_RECORD_MISC_GUEST_KERNEL G
PERF_RECORD_MISC_GUEST_USER g
PERF_RECORD_MISC_MMAP_DATA* M
PERF_RECORD_MISC_COMM_EXEC E
PERF_RECORD_MISC_SWITCH_OUT S
PERF_RECORD_MISC_SWITCH_OUT_PREEMPT Sp
$ perf script -F +misc ...
sched-messaging 1414 K 28690.636582: 4590 cycles ...
sched-messaging 1407 U 28690.636600: 325620 cycles ...
sched-messaging 1414 K 28690.636608: 19473 cycles ...
misc field ___________/
-k::
--vmlinux=<file>::
vmlinux pathname
--kallsyms=<file>::
kallsyms pathname
--symfs=<directory>::
Look for files with symbols relative to this directory.
-G::
--hide-call-graph::
When printing symbols do not display call chain.
--stop-bt::
Stop display of callgraph at these symbols
-C::
--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
be provided as a comma-separated list with no space: 0,1. Ranges of
CPUs are specified with -: 0-2. Default is to report samples on all
CPUs.
-c::
--comms=::
Only display events for these comms. CSV that understands
file://filename entries.
--pid=::
Only show events for given process ID (comma separated list).
--tid=::
Only show events for given thread ID (comma separated list).
-I::
--show-info::
Display extended information about the perf.data file. This adds
information which may be very large and thus may clutter the display.
It currently includes: cpu and numa topology of the host system.
It can only be used with the perf script report mode.
--show-kernel-path::
Try to resolve the path of [kernel.kallsyms]
--show-task-events
Display task related events (e.g. FORK, COMM, EXIT).
--show-mmap-events
Display mmap related events (e.g. MMAP, MMAP2).
--show-namespace-events
Display namespace events i.e. events of type PERF_RECORD_NAMESPACES.
--show-switch-events
Display context switch events i.e. events of type PERF_RECORD_SWITCH or
PERF_RECORD_SWITCH_CPU_WIDE.
--show-lost-events
Display lost events i.e. events of type PERF_RECORD_LOST.
--show-round-events
Display finished round events i.e. events of type PERF_RECORD_FINISHED_ROUND.
--demangle::
Demangle symbol names to human readable form. It's enabled by default,
disable with --no-demangle.
--demangle-kernel::
Demangle kernel symbol names to human readable form (for C++ kernels).
--header
Show perf.data header.
--header-only
Show only perf.data header.
--itrace::
Options for decoding instruction tracing data. The options are:
include::itrace.txt[]
To disable decoding entirely, use --no-itrace.
--full-source-path::
Show the full path for source files for srcline output.
--max-stack::
Set the stack depth limit when parsing the callchain, anything
beyond the specified depth will be ignored. This is a trade-off
between information loss and faster processing especially for
workloads that can have a very long callchain stack.
Note that when using the --itrace option the synthesized callchain size
will override this value if the synthesized callchain size is bigger.
Default: 127
--ns::
Use 9 decimal places when displaying time (i.e. show the nanoseconds)
-f::
--force::
Don't do ownership validation.
--time::
Only analyze samples within given time window: <start>,<stop>. Times
have the format seconds.microseconds. If start is not given (i.e., time
string is ',x.y') then analysis starts at the beginning of the file. If
stop time is not given (i.e, time string is 'x.y,') then analysis goes
to end of file.
Also support time percent with multipe time range. Time string is
'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'.
For example:
Select the second 10% time slice:
perf script --time 10%/2
Select from 0% to 10% time slice:
perf script --time 0%-10%
Select the first and second 10% time slices:
perf script --time 10%/1,10%/2
Select from 0% to 10% and 30% to 40% slices:
perf script --time 0%-10%,30%-40%
--max-blocks::
Set the maximum number of program blocks to print with brstackasm for
each sample.
--per-event-dump::
Create per event files with a "perf.data.EVENT.dump" name instead of
printing to stdout, useful, for instance, for generating flamegraphs.
--inline::
If a callgraph address belongs to an inlined function, the inline stack
will be printed. Each entry has function name and file/line. Enabled by
default, disable with --no-inline.
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script-perl[1],
linkperf:perf-script-python[1]

337
Documentation/perf-stat.txt Normal file
View file

@ -0,0 +1,337 @@
perf-stat(1)
============
NAME
----
perf-stat - Run a command and gather performance counter statistics
SYNOPSIS
--------
[verse]
'perf stat' [-e <EVENT> | --event=EVENT] [-a] <command>
'perf stat' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>]
'perf stat' [-e <EVENT> | --event=EVENT] [-a] record [-o file] -- <command> [<options>]
'perf stat' report [-i file]
DESCRIPTION
-----------
This command runs a command and gathers performance counter statistics
from it.
OPTIONS
-------
<command>...::
Any command you can specify in a shell.
record::
See STAT RECORD.
report::
See STAT REPORT.
-e::
--event=::
Select the PMU event. Selection can be:
- a symbolic event name (use 'perf list' to list all events)
- a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
hexadecimal event descriptor.
- a symbolically formed event like 'pmu/param1=0x3,param2/' where
param1 and param2 are defined as formats for the PMU in
/sys/bus/event_source/devices/<pmu>/format/*
- a symbolically formed event like 'pmu/config=M,config1=N,config2=K/'
where M, N, K are numbers (in decimal, hex, octal format).
Acceptable values for each of 'config', 'config1' and 'config2'
parameters are defined by corresponding entries in
/sys/bus/event_source/devices/<pmu>/format/*
Note that the last two syntaxes support prefix and glob matching in
the PMU name to simplify creation of events accross multiple instances
of the same type of PMU in large systems (e.g. memory controller PMUs).
Multiple PMU instances are typical for uncore PMUs, so the prefix
'uncore_' is also ignored when performing this match.
-i::
--no-inherit::
child tasks do not inherit counters
-p::
--pid=<pid>::
stat events on existing process id (comma separated list)
-t::
--tid=<tid>::
stat events on existing thread id (comma separated list)
-a::
--all-cpus::
system-wide collection from all CPUs (default if no target is specified)
-c::
--scale::
scale/normalize counter values
-d::
--detailed::
print more detailed statistics, can be specified up to 3 times
-d: detailed events, L1 and LLC data cache
-d -d: more detailed events, dTLB and iTLB events
-d -d -d: very detailed events, adding prefetch events
-r::
--repeat=<n>::
repeat command and print average + stddev (max: 100). 0 means forever.
-B::
--big-num::
print large numbers with thousands' separators according to locale
-C::
--cpu=::
Count only on the list of CPUs provided. Multiple CPUs can be provided as a
comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
In per-thread mode, this option is ignored. The -a option is still necessary
to activate system-wide monitoring. Default is to count on all CPUs.
-A::
--no-aggr::
Do not aggregate counts across all monitored CPUs.
-n::
--null::
null run - don't start any counters
-v::
--verbose::
be more verbose (show counter open errors, etc)
-x SEP::
--field-separator SEP::
print counts using a CSV-style output to make it easy to import directly into
spreadsheets. Columns are separated by the string specified in SEP.
-G name::
--cgroup name::
monitor only in the container (cgroup) called "name". This option is available only
in per-cpu mode. The cgroup filesystem must be mounted. All threads belonging to
container "name" are monitored when they run on the monitored CPUs. Multiple cgroups
can be provided. Each cgroup is applied to the corresponding event, i.e., first cgroup
to first event, second cgroup to second event and so on. It is possible to provide
an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must have
corresponding events, i.e., they always refer to events defined earlier on the command
line. If the user wants to track multiple events for a specific cgroup, the user can
use '-e e1 -e e2 -G foo,foo' or just use '-e e1 -e e2 -G foo'.
If wanting to monitor, say, 'cycles' for a cgroup and also for system wide, this
command line can be used: 'perf stat -e cycles -G cgroup_name -a -e cycles'.
-o file::
--output file::
Print the output into the designated file.
--append::
Append to the output file designated with the -o option. Ignored if -o is not specified.
--log-fd::
Log output to fd, instead of stderr. Complementary to --output, and mutually exclusive
with it. --append may be used here. Examples:
3>results perf stat --log-fd 3 -- $cmd
3>>results perf stat --log-fd 3 --append -- $cmd
--pre::
--post::
Pre and post measurement hooks, e.g.:
perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- make -s -j64 O=defconfig-build/ bzImage
-I msecs::
--interval-print msecs::
Print count deltas every N milliseconds (minimum: 1ms)
The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals. Use with caution.
example: 'perf stat -I 1000 -e cycles -a sleep 5'
--interval-count times::
Print count deltas for fixed number of times.
This option should be used together with "-I" option.
example: 'perf stat -I 1000 --interval-count 2 -e cycles -a'
--timeout msecs::
Stop the 'perf stat' session and print count deltas after N milliseconds (minimum: 10 ms).
This option is not supported with the "-I" option.
example: 'perf stat --time 2000 -e cycles -a'
--metric-only::
Only print computed metrics. Print them in a single line.
Don't show any raw values. Not supported with --per-thread.
--per-socket::
Aggregate counts per processor socket for system-wide mode measurements. This
is a useful mode to detect imbalance between sockets. To enable this mode,
use --per-socket in addition to -a. (system-wide). The output includes the
socket number and the number of online processors on that socket. This is
useful to gauge the amount of aggregation.
--per-core::
Aggregate counts per physical processor for system-wide mode measurements. This
is a useful mode to detect imbalance between physical cores. To enable this mode,
use --per-core in addition to -a. (system-wide). The output includes the
core number and the number of online logical processors on that physical processor.
--per-thread::
Aggregate counts per monitored threads, when monitoring threads (-t option)
or processes (-p option).
-D msecs::
--delay msecs::
After starting the program, wait msecs before measuring. This is useful to
filter out the startup phase of the program, which is often very different.
-T::
--transaction::
Print statistics of transactional execution if supported.
STAT RECORD
-----------
Stores stat data into perf data file.
-o file::
--output file::
Output file name.
STAT REPORT
-----------
Reads and reports stat data from perf data file.
-i file::
--input file::
Input file name.
--per-socket::
Aggregate counts per processor socket for system-wide mode measurements.
--per-core::
Aggregate counts per physical processor for system-wide mode measurements.
-M::
--metrics::
Print metrics or metricgroups specified in a comma separated list.
For a group all metrics from the group are added.
The events from the metrics are automatically measured.
See perf list output for the possble metrics and metricgroups.
-A::
--no-aggr::
Do not aggregate counts across all monitored CPUs.
--topdown::
Print top down level 1 metrics if supported by the CPU. This allows to
determine bottle necks in the CPU pipeline for CPU bound workloads,
by breaking the cycles consumed down into frontend bound, backend bound,
bad speculation and retiring.
Frontend bound means that the CPU cannot fetch and decode instructions fast
enough. Backend bound means that computation or memory access is the bottle
neck. Bad Speculation means that the CPU wasted cycles due to branch
mispredictions and similar issues. Retiring means that the CPU computed without
an apparently bottleneck. The bottleneck is only the real bottleneck
if the workload is actually bound by the CPU and not by something else.
For best results it is usually a good idea to use it with interval
mode like -I 1000, as the bottleneck of workloads can change often.
The top down metrics are collected per core instead of per
CPU thread. Per core mode is automatically enabled
and -a (global monitoring) is needed, requiring root rights or
perf.perf_event_paranoid=-1.
Topdown uses the full Performance Monitoring Unit, and needs
disabling of the NMI watchdog (as root):
echo 0 > /proc/sys/kernel/nmi_watchdog
for best results. Otherwise the bottlenecks may be inconsistent
on workload with changing phases.
This enables --metric-only, unless overriden with --no-metric-only.
To interpret the results it is usually needed to know on which
CPUs the workload runs on. If needed the CPUs can be forced using
taskset.
--no-merge::
Do not merge results from same PMUs.
When multiple events are created from a single event specification,
stat will, by default, aggregate the event counts and show the result
in a single row. This option disables that behavior and shows
the individual events and counts.
Multiple events are created from a single event specification when:
1. Prefix or glob matching is used for the PMU name.
2. Aliases, which are listed immediately after the Kernel PMU events
by perf list, are used.
--smi-cost::
Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
During the measurement, the /sys/device/cpu/freeze_on_smi will be set to
freeze core counters on SMI.
The aperf counter will not be effected by the setting.
The cost of SMI can be measured by (aperf - unhalted core cycles).
In practice, the percentages of SMI cycles is very useful for performance
oriented analysis. --metric_only will be applied by default.
The output is SMI cycles%, equals to (aperf - unhalted core cycles) / aperf
Users who wants to get the actual value can apply --no-metric-only.
EXAMPLES
--------
$ perf stat -- make -j
Performance counter stats for 'make -j':
8117.370256 task clock ticks # 11.281 CPU utilization factor
678 context switches # 0.000 M/sec
133 CPU migrations # 0.000 M/sec
235724 pagefaults # 0.029 M/sec
24821162526 CPU cycles # 3057.784 M/sec
18687303457 instructions # 2302.138 M/sec
172158895 cache references # 21.209 M/sec
27075259 cache misses # 3.335 M/sec
Wall-clock time elapsed: 719.554352 msecs
CSV FORMAT
----------
With -x, perf stat is able to output a not-quite-CSV format output
Commas in the output are not put into "". To make it easy to parse
it is recommended to use a different character like -x \;
The fields are in this order:
- optional usec time stamp in fractions of second (with -I xxx)
- optional CPU, core, or socket identifier
- optional number of logical CPUs aggregated
- counter value
- unit of the counter value or empty
- event name
- run time of counter
- percentage of measurement time the counter was running
- optional variance if multiple values are collected with -r
- optional metric value
- optional unit of metric
Additional metrics may be printed with all earlier fields being empty.
SEE ALSO
--------
linkperf:perf-top[1], linkperf:perf-list[1]

View file

@ -0,0 +1,36 @@
perf-test(1)
============
NAME
----
perf-test - Runs sanity tests.
SYNOPSIS
--------
[verse]
'perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]'
DESCRIPTION
-----------
This command does assorted sanity tests, initially through linked routines but
also will look for a directory with more tests in the form of scripts.
To get a list of available tests use 'perf test list', specifying a test name
fragment will show all tests that have it.
To run just specific tests, inform test name fragments or the numbers obtained
from 'perf test list'.
OPTIONS
-------
-s::
--skip::
Tests to skip (comma separated numeric list).
-v::
--verbose::
Be more verbose.
-F::
--dont-fork::
Do not fork child for each test, run all tests within single process.

View file

@ -0,0 +1,128 @@
perf-timechart(1)
=================
NAME
----
perf-timechart - Tool to visualize total system behavior during a workload
SYNOPSIS
--------
[verse]
'perf timechart' [<timechart options>] {record} [<record options>]
DESCRIPTION
-----------
There are two variants of perf timechart:
'perf timechart record <command>' to record the system level events
of an arbitrary workload. By default timechart records only scheduler
and CPU events (task switches, running times, CPU power states, etc),
but it's possible to record IO (disk, network) activity using -I argument.
'perf timechart' to turn a trace into a Scalable Vector Graphics file,
that can be viewed with popular SVG viewers such as 'Inkscape'. Depending
on the events in the perf.data file, timechart will contain scheduler/cpu
events or IO events.
In IO mode, every bar has two charts: upper and lower.
Upper bar shows incoming events (disk reads, ingress network packets).
Lower bar shows outgoing events (disk writes, egress network packets).
There are also poll bars which show how much time application spent
in poll/epoll/select syscalls.
TIMECHART OPTIONS
-----------------
-o::
--output=::
Select the output file (default: output.svg)
-i::
--input=::
Select the input file (default: perf.data unless stdin is a fifo)
-w::
--width=::
Select the width of the SVG file (default: 1000)
-P::
--power-only::
Only output the CPU power section of the diagram
-T::
--tasks-only::
Don't output processor state transitions
-p::
--process::
Select the processes to display, by name or PID
-f::
--force::
Don't complain, do it.
--symfs=<directory>::
Look for files with symbols relative to this directory.
-n::
--proc-num::
Print task info for at least given number of tasks.
-t::
--topology::
Sort CPUs according to topology.
--highlight=<duration_nsecs|task_name>::
Highlight tasks (using different color) that run more than given
duration or tasks with given name. If number is given it's interpreted
as number of nanoseconds. If non-numeric string is given it's
interpreted as task name.
--io-skip-eagain::
Don't draw EAGAIN IO events.
--io-min-time=<nsecs>::
Draw small events as if they lasted min-time. Useful when you need
to see very small and fast IO. It's possible to specify ms or us
suffix to specify time in milliseconds or microseconds.
Default value is 1ms.
--io-merge-dist=<nsecs>::
Merge events that are merge-dist nanoseconds apart.
Reduces number of figures on the SVG and makes it more render-friendly.
It's possible to specify ms or us suffix to specify time in
milliseconds or microseconds.
Default value is 1us.
RECORD OPTIONS
--------------
-P::
--power-only::
Record only power-related events
-T::
--tasks-only::
Record only tasks-related events
-I::
--io-only::
Record only io-related events
-g::
--callchain::
Do call-graph (stack chain/backtrace) recording
EXAMPLES
--------
$ perf timechart record git pull
[ perf record: Woken up 13 times to write data ]
[ perf record: Captured and wrote 4.253 MB perf.data (~185801 samples) ]
$ perf timechart
Written 10.2 seconds of trace to output.svg.
Record system-wide timechart:
$ perf timechart record
then generate timechart and highlight 'gcc' tasks:
$ perf timechart --highlight gcc
Record system-wide IO events:
$ perf timechart record -I
then generate timechart:
$ perf timechart
SEE ALSO
--------
linkperf:perf-record[1]

294
Documentation/perf-top.txt Normal file
View file

@ -0,0 +1,294 @@
perf-top(1)
===========
NAME
----
perf-top - System profiling tool.
SYNOPSIS
--------
[verse]
'perf top' [-e <EVENT> | --event=EVENT] [<options>]
DESCRIPTION
-----------
This command generates and displays a performance counter profile in real time.
OPTIONS
-------
-a::
--all-cpus::
System-wide collection. (default)
-c <count>::
--count=<count>::
Event period to sample.
-C <cpu-list>::
--cpu=<cpu>::
Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a
comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
Default is to monitor all CPUS.
-d <seconds>::
--delay=<seconds>::
Number of seconds to delay between refreshes.
-e <event>::
--event=<event>::
Select the PMU event. Selection can be a symbolic event name
(use 'perf list' to list all events) or a raw PMU
event (eventsel+umask) in the form of rNNN where NNN is a
hexadecimal event descriptor.
-E <entries>::
--entries=<entries>::
Display this many functions.
-f <count>::
--count-filter=<count>::
Only display functions with more events than this.
--group::
Put the counters into a counter group.
-F <freq>::
--freq=<freq>::
Profile at this frequency. Use 'max' to use the currently maximum
allowed frequency, i.e. the value in the kernel.perf_event_max_sample_rate
sysctl.
-i::
--inherit::
Child tasks do not inherit counters.
-k <path>::
--vmlinux=<path>::
Path to vmlinux. Required for annotation functionality.
--ignore-vmlinux::
Ignore vmlinux files.
-m <pages>::
--mmap-pages=<pages>::
Number of mmap data pages (must be a power of two) or size
specification with appended unit character - B/K/M/G. The
size is rounded up to have nearest pages power of two value.
-p <pid>::
--pid=<pid>::
Profile events on existing Process ID (comma separated list).
-t <tid>::
--tid=<tid>::
Profile events on existing thread ID (comma separated list).
-u::
--uid=::
Record events in threads owned by uid. Name or number.
-r <priority>::
--realtime=<priority>::
Collect data with this RT SCHED_FIFO priority.
--sym-annotate=<symbol>::
Annotate this symbol.
-K::
--hide_kernel_symbols::
Hide kernel symbols.
-U::
--hide_user_symbols::
Hide user symbols.
--demangle-kernel::
Demangle kernel symbols.
-D::
--dump-symtab::
Dump the symbol table used for profiling.
-v::
--verbose::
Be more verbose (show counter open errors, etc).
-z::
--zero::
Zero history across display updates.
-s::
--sort::
Sort by key(s): pid, comm, dso, symbol, parent, srcline, weight,
local_weight, abort, in_tx, transaction, overhead, sample, period.
Please see description of --sort in the perf-report man page.
--fields=::
Specify output field - multiple keys can be specified in CSV format.
Following fields are available:
overhead, overhead_sys, overhead_us, overhead_children, sample and period.
Also it can contain any sort key(s).
By default, every sort keys not specified in --field will be appended
automatically.
-n::
--show-nr-samples::
Show a column with the number of samples.
--show-total-period::
Show a column with the sum of periods.
--dsos::
Only consider symbols in these dsos. This option will affect the
percentage of the overhead column. See --percentage for more info.
--comms::
Only consider symbols in these comms. This option will affect the
percentage of the overhead column. See --percentage for more info.
--symbols::
Only consider these symbols. This option will affect the
percentage of the overhead column. See --percentage for more info.
-M::
--disassembler-style=:: Set disassembler style for objdump.
--source::
Interleave source code with assembly code. Enabled by default,
disable with --no-source.
--asm-raw::
Show raw instruction encoding of assembly instructions.
-g::
Enables call-graph (stack chain/backtrace) recording.
--call-graph [mode,type,min[,limit],order[,key][,branch]]::
Setup and enable call-graph (stack chain/backtrace) recording,
implies -g. See `--call-graph` section in perf-record and
perf-report man pages for details.
--children::
Accumulate callchain of children to parent entry so that then can
show up in the output. The output will have a new "Children" column
and will be sorted on the data. It requires -g/--call-graph option
enabled. See the `overhead calculation' section for more details.
Enabled by default, disable with --no-children.
--max-stack::
Set the stack depth limit when parsing the callchain, anything
beyond the specified depth will be ignored. This is a trade-off
between information loss and faster processing especially for
workloads that can have a very long callchain stack.
Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
--ignore-callees=<regex>::
Ignore callees of the function(s) matching the given regex.
This has the effect of collecting the callers of each such
function into one place in the call-graph tree.
--percent-limit::
Do not show entries which have an overhead under that percent.
(Default: 0).
--percentage::
Determine how to display the overhead percentage of filtered entries.
Filters can be applied by --comms, --dsos and/or --symbols options and
Zoom operations on the TUI (thread, dso, etc).
"relative" means it's relative to filtered entries only so that the
sum of shown entries will be always 100%. "absolute" means it retains
the original value before and after the filter is applied.
-w::
--column-widths=<width[,width...]>::
Force each column width to the provided list, for large terminal
readability. 0 means no limit (default behavior).
--proc-map-timeout::
When processing pre-existing threads /proc/XXX/mmap, it may take
a long time, because the file may be huge. A time out is needed
in such cases.
This option sets the time out limit. The default value is 500 ms.
-b::
--branch-any::
Enable taken branch stack sampling. Any type of taken branch may be sampled.
This is a shortcut for --branch-filter any. See --branch-filter for more infos.
-j::
--branch-filter::
Enable taken branch stack sampling. Each sample captures a series of consecutive
taken branches. The number of branches captured with each sample depends on the
underlying hardware, the type of branches of interest, and the executed code.
It is possible to select the types of branches captured by enabling filters.
For a full list of modifiers please see the perf record manpage.
The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
The privilege levels may be omitted, in which case, the privilege levels of the associated
event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
levels are subject to permissions. When sampling on multiple events, branch stack sampling
is enabled for all the sampling events. The sampled branch type is the same for all events.
The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
Note that this feature may not be available on all processors.
--raw-trace::
When displaying traceevent output, do not use print fmt or plugins.
--hierarchy::
Enable hierarchy output.
--force::
Don't do ownership validation.
--num-thread-synthesize::
The number of threads to run when synthesizing events for existing processes.
By default, the number of threads equals to the number of online CPUs.
INTERACTIVE PROMPTING KEYS
--------------------------
[d]::
Display refresh delay.
[e]::
Number of entries to display.
[E]::
Event to display when multiple counters are active.
[f]::
Profile display filter (>= hit count).
[F]::
Annotation display filter (>= % of total).
[s]::
Annotate symbol.
[S]::
Stop annotation, return to full profile display.
[K]::
Hide kernel symbols.
[U]::
Hide user symbols.
[z]::
Toggle event count zeroing across display updates.
[qQ]::
Quit.
Pressing any unmapped key displays a menu, and prompts for input.
include::callchain-overhead-calculation.txt[]
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-report[1]

View file

@ -0,0 +1,243 @@
perf-trace(1)
=============
NAME
----
perf-trace - strace inspired tool
SYNOPSIS
--------
[verse]
'perf trace'
'perf trace record'
DESCRIPTION
-----------
This command will show the events associated with the target, initially
syscalls, but other system events like pagefaults, task lifetime events,
scheduling events, etc.
This is a live mode tool in addition to working with perf.data files like
the other perf tools. Files can be generated using the 'perf record' command
but the session needs to include the raw_syscalls events (-e 'raw_syscalls:*').
Alternatively, 'perf trace record' can be used as a shortcut to
automatically include the raw_syscalls events when writing events to a file.
The following options apply to perf trace; options to perf trace record are
found in the perf record man page.
OPTIONS
-------
-a::
--all-cpus::
System-wide collection from all CPUs.
-e::
--expr::
--event::
List of syscalls and other perf events (tracepoints, HW cache events,
etc) to show. Globbing is supported, e.g.: "epoll_*", "*msg*", etc.
See 'perf list' for a complete list of events.
Prefixing with ! shows all syscalls but the ones specified. You may
need to escape it.
-D msecs::
--delay msecs::
After starting the program, wait msecs before measuring. This is useful to
filter out the startup phase of the program, which is often very different.
-o::
--output=::
Output file name.
-p::
--pid=::
Record events on existing process ID (comma separated list).
-t::
--tid=::
Record events on existing thread ID (comma separated list).
-u::
--uid=::
Record events in threads owned by uid. Name or number.
-G::
--cgroup::
Record events in threads in a cgroup.
Look for cgroups to set at the /sys/fs/cgroup/perf_event directory, then
remove the /sys/fs/cgroup/perf_event/ part and try:
perf trace -G A -e sched:*switch
Will set all raw_syscalls:sys_{enter,exit}, pgfault, vfs_getname, etc
_and_ sched:sched_switch to the 'A' cgroup, while:
perf trace -e sched:*switch -G A
will only set the sched:sched_switch event to the 'A' cgroup, all the
other events (raw_syscalls:sys_{enter,exit}, etc are left "without"
a cgroup (on the root cgroup, sys wide, etc).
Multiple cgroups:
perf trace -G A -e sched:*switch -G B
the syscall ones go to the 'A' cgroup, the sched:sched_switch goes
to the 'B' cgroup.
--filter-pids=::
Filter out events for these pids and for 'trace' itself (comma separated list).
-v::
--verbose=::
Verbosity level.
--no-inherit::
Child tasks do not inherit counters.
-m::
--mmap-pages=::
Number of mmap data pages (must be a power of two) or size
specification with appended unit character - B/K/M/G. The
size is rounded up to have nearest pages power of two value.
-C::
--cpu::
Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a
comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
In per-thread mode with inheritance mode on (default), Events are captured only when
the thread executes on the designated CPUs. Default is to monitor all CPUs.
--duration::
Show only events that had a duration greater than N.M ms.
--sched::
Accrue thread runtime and provide a summary at the end of the session.
--failure::
Show only syscalls that failed, i.e. that returned < 0.
-i::
--input::
Process events from a given perf data file.
-T::
--time::
Print full timestamp rather time relative to first sample.
--comm::
Show process COMM right beside its ID, on by default, disable with --no-comm.
-s::
--summary::
Show only a summary of syscalls by thread with min, max, and average times
(in msec) and relative stddev.
-S::
--with-summary::
Show all syscalls followed by a summary by thread with min, max, and
average times (in msec) and relative stddev.
--tool_stats::
Show tool stats such as number of times fd->pathname was discovered thru
hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc.
-f::
--force::
Don't complain, do it.
-F=[all|min|maj]::
--pf=[all|min|maj]::
Trace pagefaults. Optionally, you can specify whether you want minor,
major or all pagefaults. Default value is maj.
--syscalls::
Trace system calls. This options is enabled by default, disable with
--no-syscalls.
--call-graph [mode,type,min[,limit],order[,key][,branch]]::
Setup and enable call-graph (stack chain/backtrace) recording.
See `--call-graph` section in perf-record and perf-report
man pages for details. The ones that are most useful in 'perf trace'
are 'dwarf' and 'lbr', where available, try: 'perf trace --call-graph dwarf'.
Using this will, for the root user, bump the value of --mmap-pages to 4
times the maximum for non-root users, based on the kernel.perf_event_mlock_kb
sysctl. This is done only if the user doesn't specify a --mmap-pages value.
--kernel-syscall-graph::
Show the kernel callchains on the syscall exit path.
--max-stack::
Set the stack depth limit when parsing the callchain, anything
beyond the specified depth will be ignored. Note that at this point
this is just about the presentation part, i.e. the kernel is still
not limiting, the overhead of callchains needs to be set via the
knobs in --call-graph dwarf.
Implies '--call-graph dwarf' when --call-graph not present on the
command line, on systems where DWARF unwinding was built in.
Default: /proc/sys/kernel/perf_event_max_stack when present for
live sessions (without --input/-i), 127 otherwise.
--min-stack::
Set the stack depth limit when parsing the callchain, anything
below the specified depth will be ignored. Disabled by default.
Implies '--call-graph dwarf' when --call-graph not present on the
command line, on systems where DWARF unwinding was built in.
--print-sample::
Print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info for the
raw_syscalls:sys_{enter,exit} tracepoints, for debugging.
--proc-map-timeout::
When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
because the file may be huge. A time out is needed in such cases.
This option sets the time out limit. The default value is 500 ms.
PAGEFAULTS
----------
When tracing pagefaults, the format of the trace is as follows:
<min|maj>fault [<ip.symbol>+<ip.offset>] => <addr.dso@addr.offset> (<map type><addr level>).
- min/maj indicates whether fault event is minor or major;
- ip.symbol shows symbol for instruction pointer (the code that generated the
fault); if no debug symbols available, perf trace will print raw IP;
- addr.dso shows DSO for the faulted address;
- map type is either 'd' for non-executable maps or 'x' for executable maps;
- addr level is either 'k' for kernel dso or '.' for user dso.
For symbols resolution you may need to install debugging symbols.
Please be aware that duration is currently always 0 and doesn't reflect actual
time it took for fault to be handled!
When --verbose specified, perf trace tries to print all available information
for both IP and fault address in the form of dso@symbol+offset.
EXAMPLES
--------
Trace only major pagefaults:
$ perf trace --no-syscalls -F
Trace syscalls, major and minor pagefaults:
$ perf trace -F all
1416.547 ( 0.000 ms): python/20235 majfault [CRYPTO_push_info_+0x0] => /lib/x86_64-linux-gnu/libcrypto.so.1.0.0@0x61be0 (x.)
As you can see, there was major pagefault in python process, from
CRYPTO_push_info_ routine which faulted somewhere in libcrypto.so.
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script[1]

View file

@ -0,0 +1,24 @@
perf-version(1)
===============
NAME
----
perf-version - display the version of perf binary
SYNOPSIS
--------
'perf version' [--build-options]
DESCRIPTION
-----------
With no options given, the 'perf version' prints the perf version
on the standard output.
If the option '--build-options' is given, then the status of
compiled-in libraries are printed on the standard output.
OPTIONS
-------
--build-options::
Prints the status of compiled-in libraries on the
standard output.

View file

@ -0,0 +1,489 @@
perf.data format
Uptodate as of v4.7
This document describes the on-disk perf.data format, generated by perf record
or perf inject and consumed by the other perf tools.
On a high level perf.data contains the events generated by the PMUs, plus metadata.
All fields are in native-endian of the machine that generated the perf.data.
When perf is writing to a pipe it uses a special version of the file
format that does not rely on seeking to adjust data offsets. This
format is described in "Pipe-mode data" section. The pipe data version can be
augmented with additional events using perf inject.
The file starts with a perf_header:
struct perf_header {
char magic[8]; /* PERFILE2 */
uint64_t size; /* size of the header */
uint64_t attr_size; /* size of an attribute in attrs */
struct perf_file_section attrs;
struct perf_file_section data;
struct perf_file_section event_types;
uint64_t flags;
uint64_t flags1[3];
};
The magic number identifies the perf file and the version. Current perf versions
use PERFILE2. Old perf versions generated a version 1 format (PERFFILE). Version 1
is not described here. The magic number also identifies the endian. When the
magic value is 64bit byte swapped compared the file is in non-native
endian.
A perf_file_section contains a pointer to another section of the perf file.
The header contains three such pointers: for attributes, data and event types.
struct perf_file_section {
uint64_t offset; /* offset from start of file */
uint64_t size; /* size of the section */
};
Flags section:
The header is followed by different optional headers, described by the bits set
in flags. Only headers for which the bit is set are included. Each header
consists of a perf_file_section located after the initial header.
The respective perf_file_section points to the data of the additional
header and defines its size.
Some headers consist of strings, which are defined like this:
struct perf_header_string {
uint32_t len;
char string[len]; /* zero terminated */
};
Some headers consist of a sequence of strings, which start with a
struct perf_header_string_list {
uint32_t nr;
struct perf_header_string strings[nr]; /* variable length records */
};
The bits are the flags bits in a 256 bit bitmap starting with
flags. These define the valid bits:
HEADER_RESERVED = 0, /* always cleared */
HEADER_FIRST_FEATURE = 1,
HEADER_TRACING_DATA = 1,
Describe me.
HEADER_BUILD_ID = 2,
The header consists of an sequence of build_id_event. The size of each record
is defined by header.size (see perf_event.h). Each event defines a ELF build id
for a executable file name for a pid. An ELF build id is a unique identifier
assigned by the linker to an executable.
struct build_id_event {
struct perf_event_header header;
pid_t pid;
uint8_t build_id[24];
char filename[header.size - offsetof(struct build_id_event, filename)];
};
HEADER_HOSTNAME = 3,
A perf_header_string with the hostname where the data was collected
(uname -n)
HEADER_OSRELEASE = 4,
A perf_header_string with the os release where the data was collected
(uname -r)
HEADER_VERSION = 5,
A perf_header_string with the perf user tool version where the
data was collected. This is the same as the version of the source tree
the perf tool was built from.
HEADER_ARCH = 6,
A perf_header_string with the CPU architecture (uname -m)
HEADER_NRCPUS = 7,
A structure defining the number of CPUs.
struct nr_cpus {
uint32_t nr_cpus_online;
uint32_t nr_cpus_available; /* CPUs not yet onlined */
};
HEADER_CPUDESC = 8,
A perf_header_string with description of the CPU. On x86 this is the model name
in /proc/cpuinfo
HEADER_CPUID = 9,
A perf_header_string with the exact CPU type. On x86 this is
vendor,family,model,stepping. For example: GenuineIntel,6,69,1
HEADER_TOTAL_MEM = 10,
An uint64_t with the total memory in bytes.
HEADER_CMDLINE = 11,
A perf_header_string with the perf command line used to collect the data.
HEADER_EVENT_DESC = 12,
Another description of the perf_event_attrs, more detailed than header.attrs
including IDs and names. See perf_event.h or the man page for a description
of a struct perf_event_attr.
struct {
uint32_t nr; /* number of events */
uint32_t attr_size; /* size of each perf_event_attr */
struct {
struct perf_event_attr attr; /* size of attr_size */
uint32_t nr_ids;
struct perf_header_string event_string;
uint64_t ids[nr_ids];
} events[nr]; /* Variable length records */
};
HEADER_CPU_TOPOLOGY = 13,
String lists defining the core and CPU threads topology.
struct {
struct perf_header_string_list cores; /* Variable length */
struct perf_header_string_list threads; /* Variable length */
};
Example:
sibling cores : 0-3
sibling threads : 0-1
sibling threads : 2-3
HEADER_NUMA_TOPOLOGY = 14,
A list of NUMA node descriptions
struct {
uint32_t nr;
struct {
uint32_t nodenr;
uint64_t mem_total;
uint64_t mem_free;
struct perf_header_string cpus;
} nodes[nr]; /* Variable length records */
};
HEADER_BRANCH_STACK = 15,
Not implemented in perf.
HEADER_PMU_MAPPINGS = 16,
A list of PMU structures, defining the different PMUs supported by perf.
struct {
uint32_t nr;
struct pmu {
uint32_t pmu_type;
struct perf_header_string pmu_name;
} [nr]; /* Variable length records */
};
HEADER_GROUP_DESC = 17,
Description of counter groups ({...} in perf syntax)
struct {
uint32_t nr;
struct {
struct perf_header_string string;
uint32_t leader_idx;
uint32_t nr_members;
} [nr]; /* Variable length records */
};
HEADER_AUXTRACE = 18,
Define additional auxtrace areas in the perf.data. auxtrace is used to store
undecoded hardware tracing information, such as Intel Processor Trace data.
/**
* struct auxtrace_index_entry - indexes a AUX area tracing event within a
* perf.data file.
* @file_offset: offset within the perf.data file
* @sz: size of the event
*/
struct auxtrace_index_entry {
u64 file_offset;
u64 sz;
};
#define PERF_AUXTRACE_INDEX_ENTRY_COUNT 256
/**
* struct auxtrace_index - index of AUX area tracing events within a perf.data
* file.
* @list: linking a number of arrays of entries
* @nr: number of entries
* @entries: array of entries
*/
struct auxtrace_index {
struct list_head list;
size_t nr;
struct auxtrace_index_entry entries[PERF_AUXTRACE_INDEX_ENTRY_COUNT];
};
HEADER_STAT = 19,
This is merely a flag signifying that the data section contains data
recorded from perf stat record.
HEADER_CACHE = 20,
Description of the cache hierarchy. Based on the Linux sysfs format
in /sys/devices/system/cpu/cpu*/cache/
u32 version Currently always 1
u32 number_of_cache_levels
struct {
u32 level;
u32 line_size;
u32 sets;
u32 ways;
struct perf_header_string type;
struct perf_header_string size;
struct perf_header_string map;
}[number_of_cache_levels];
HEADER_SAMPLE_TIME = 21,
Two uint64_t for the time of first sample and the time of last sample.
other bits are reserved and should ignored for now
HEADER_FEAT_BITS = 256,
Attributes
This is an array of perf_event_attrs, each attr_size bytes long, which defines
each event collected. See perf_event.h or the man page for a detailed
description.
Data
This section is the bulk of the file. It consist of a stream of perf_events
describing events. This matches the format generated by the kernel.
See perf_event.h or the manpage for a detailed description.
Some notes on parsing:
Ordering
The events are not necessarily in time stamp order, as they can be
collected in parallel on different CPUs. If the events should be
processed in time order they need to be sorted first. It is possible
to only do a partial sort using the FINISHED_ROUND event header (see
below). perf record guarantees that there is no reordering over a
FINISHED_ROUND.
ID vs IDENTIFIER
When the event stream contains multiple events each event is identified
by an ID. This can be either through the PERF_SAMPLE_ID or the
PERF_SAMPLE_IDENTIFIER header. The PERF_SAMPLE_IDENTIFIER header is
at a fixed offset from the event header, which allows reliable
parsing of the header. Relying on ID may be ambiguous.
IDENTIFIER is only supported by newer Linux kernels.
Perf record specific events:
In addition to the kernel generated event types perf record adds its
own event types (in addition it also synthesizes some kernel events,
for example MMAP events)
PERF_RECORD_USER_TYPE_START = 64,
PERF_RECORD_HEADER_ATTR = 64,
struct attr_event {
struct perf_event_header header;
struct perf_event_attr attr;
uint64_t id[];
};
PERF_RECORD_HEADER_EVENT_TYPE = 65, /* deprecated */
#define MAX_EVENT_NAME 64
struct perf_trace_event_type {
uint64_t event_id;
char name[MAX_EVENT_NAME];
};
struct event_type_event {
struct perf_event_header header;
struct perf_trace_event_type event_type;
};
PERF_RECORD_HEADER_TRACING_DATA = 66,
Describe me
struct tracing_data_event {
struct perf_event_header header;
uint32_t size;
};
PERF_RECORD_HEADER_BUILD_ID = 67,
Define a ELF build ID for a referenced executable.
struct build_id_event; /* See above */
PERF_RECORD_FINISHED_ROUND = 68,
No event reordering over this header. No payload.
PERF_RECORD_ID_INDEX = 69,
Map event ids to CPUs and TIDs.
struct id_index_entry {
uint64_t id;
uint64_t idx;
uint64_t cpu;
uint64_t tid;
};
struct id_index_event {
struct perf_event_header header;
uint64_t nr;
struct id_index_entry entries[nr];
};
PERF_RECORD_AUXTRACE_INFO = 70,
Auxtrace type specific information. Describe me
struct auxtrace_info_event {
struct perf_event_header header;
uint32_t type;
uint32_t reserved__; /* For alignment */
uint64_t priv[];
};
PERF_RECORD_AUXTRACE = 71,
Defines auxtrace data. Followed by the actual data. The contents of
the auxtrace data is dependent on the event and the CPU. For example
for Intel Processor Trace it contains Processor Trace data generated
by the CPU.
struct auxtrace_event {
struct perf_event_header header;
uint64_t size;
uint64_t offset;
uint64_t reference;
uint32_t idx;
uint32_t tid;
uint32_t cpu;
uint32_t reserved__; /* For alignment */
};
struct aux_event {
struct perf_event_header header;
uint64_t aux_offset;
uint64_t aux_size;
uint64_t flags;
};
PERF_RECORD_AUXTRACE_ERROR = 72,
Describes an error in hardware tracing
enum auxtrace_error_type {
PERF_AUXTRACE_ERROR_ITRACE = 1,
PERF_AUXTRACE_ERROR_MAX
};
#define MAX_AUXTRACE_ERROR_MSG 64
struct auxtrace_error_event {
struct perf_event_header header;
uint32_t type;
uint32_t code;
uint32_t cpu;
uint32_t pid;
uint32_t tid;
uint32_t reserved__; /* For alignment */
uint64_t ip;
char msg[MAX_AUXTRACE_ERROR_MSG];
};
PERF_RECORD_HEADER_FEATURE = 80,
Describes a header feature. These are records used in pipe-mode that
contain information that otherwise would be in perf.data file's header.
Event types
Define the event attributes with their IDs.
An array bound by the perf_file_section size.
struct {
struct perf_event_attr attr; /* Size defined by header.attr_size */
struct perf_file_section ids;
}
ids points to a array of uint64_t defining the ids for event attr attr.
Pipe-mode data
Pipe-mode avoid seeks in the file by removing the perf_file_section and flags
from the struct perf_header. The trimmed header is:
struct perf_pipe_file_header {
u64 magic;
u64 size;
};
The information about attrs, data, and event_types is instead in the
synthesized events PERF_RECORD_ATTR, PERF_RECORD_HEADER_TRACING_DATA,
PERF_RECORD_HEADER_EVENT_TYPE, and PERF_RECORD_HEADER_FEATURE
that are generated by perf record in pipe-mode.
References:
include/uapi/linux/perf_event.h
This is the canonical description of the kernel generated perf_events
and the perf_event_attrs.
perf_events manpage
A manpage describing perf_event and perf_event_attr is here:
http://web.eece.maine.edu/~vweaver/projects/perf_events/programming.html
This tends to be slightly behind the kernel include, but has better
descriptions. An (typically older) version of the man page may be
included with the standard Linux man pages, available with "man
perf_events"
pmu-tools
https://github.com/andikleen/pmu-tools/tree/master/parser
A definition of the perf.data format in python "construct" format is available
in pmu-tools parser. This allows to read perf.data from python and dump it.
quipper
The quipper C++ parser is available at
http://github.com/google/perf_data_converter/tree/master/src/quipper

49
Documentation/perf.txt Normal file
View file

@ -0,0 +1,49 @@
perf(1)
=======
NAME
----
perf - Performance analysis tools for Linux
SYNOPSIS
--------
[verse]
'perf' [--version] [--help] [OPTIONS] COMMAND [ARGS]
OPTIONS
-------
--debug::
Setup debug variable (see list below) in value
range (0, 10). Use like:
--debug verbose # sets verbose = 1
--debug verbose=2 # sets verbose = 2
List of debug variables allowed to set:
verbose - general debug messages
ordered-events - ordered events object debug messages
data-convert - data convert command debug messages
--buildid-dir::
Setup buildid cache directory. It has higher priority than
buildid.dir config file option.
-v::
--version::
Display perf version.
-h::
--help::
Run perf help command.
DESCRIPTION
-----------
Performance counters for Linux are a new kernel-based subsystem
that provide a framework for all things performance analysis. It
covers hardware level (CPU/PMU, Performance Monitoring Unit) features
and software features (software counters, tracepoints) as well.
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-top[1],
linkperf:perf-record[1], linkperf:perf-report[1],
linkperf:perf-list[1]

View file

@ -0,0 +1,38 @@
[colors]
# These were the old defaults
top = red, lightgray
medium = green, lightgray
normal = black, lightgray
selected = lightgray, magenta
jump_arrows = blue, lightgray
addr = magenta, lightgray
[tui]
# Defaults if linked with libslang
report = on
annotate = on
top = on
[buildid]
# Default, disable using /dev/null
dir = /root/.debug
[annotate]
# Defaults
hide_src_code = false
use_offset = true
jump_arrows = true
show_nr_jumps = false
[report]
# Defaults
sort-order = comm,dso,symbol
percent-limit = 0
queue-size = 0
children = true
group = true

36
Documentation/tips.txt Normal file
View file

@ -0,0 +1,36 @@
For a higher level overview, try: perf report --sort comm,dso
Sample related events with: perf record -e '{cycles,instructions}:S'
Compare performance results with: perf diff [<old file> <new file>]
Boolean options have negative forms, e.g.: perf report --no-children
Customize output of perf script with: perf script -F event,ip,sym
Generate a script for your data: perf script -g <lang>
Save output of perf stat using: perf stat record <target workload>
Create an archive with symtabs to analyse on other machine: perf archive
Search options using a keyword: perf report -h <keyword>
Use parent filter to see specific call path: perf report -p <regex>
List events using substring match: perf list <keyword>
To see list of saved events and attributes: perf evlist -v
Use --symfs <dir> if your symbol files are in non-standard locations
To see callchains in a more compact form: perf report -g folded
Show individual samples with: perf script
Limit to show entries above 5% only: perf report --percent-limit 5
Profiling branch (mis)predictions with: perf record -b / perf report
Treat branches as callchains: perf report --branch-history
To count events in every 1000 msec: perf stat -I 1000
Print event counts in CSV format with: perf stat -x,
If you have debuginfo enabled, try: perf report -s sym,srcline
For memory address profiling, try: perf mem record / perf mem report
For tracepoint events, try: perf report -s trace_fields
To record callchains for each sample: perf record -g
To record every process run by a user: perf record -u <user>
Skip collecting build-id when recording: perf record -B
To change sampling frequency to 100 Hz: perf record -F 100
See assembly instructions with percentage: perf annotate <symbol>
If you prefer Intel style assembly, try: perf annotate -M intel
For hierarchical output, try: perf report --hierarchy
Order by the overhead of source file name and line number: perf report -s srcline
System-wide collection from all CPUs: perf record -a
Show current config key-value pairs: perf config --list
Show user configuration overrides: perf config --user --list
To add Node.js USDT(User-Level Statically Defined Tracing): perf buildid-cache --add `which node`
To report cacheline events from previous recording: perf c2c report

18
MANIFEST Normal file
View file

@ -0,0 +1,18 @@
tools/perf
tools/arch
tools/scripts
tools/build
tools/include
tools/lib/traceevent
tools/lib/api
tools/lib/bpf
tools/lib/subcmd
tools/lib/hweight.c
tools/lib/rbtree.c
tools/lib/string.c
tools/lib/symbol/kallsyms.c
tools/lib/symbol/kallsyms.h
tools/lib/find_bit.c
tools/lib/bitmap.c
tools/lib/str_error_r.c
tools/lib/vsprintf.c

112
Makefile Normal file
View file

@ -0,0 +1,112 @@
# SPDX-License-Identifier: GPL-2.0
#
# This is a simple wrapper Makefile that calls the main Makefile.perf
# with a -j option to do parallel builds
#
# If you want to invoke the perf build in some non-standard way then
# you can use the 'make -f Makefile.perf' method to invoke it.
#
#
# Clear out the built-in rules GNU make defines by default (such as .o targets),
# so that we pass through all targets to Makefile.perf:
#
.SUFFIXES:
#
# We don't want to pass along options like -j:
#
unexport MAKEFLAGS
#
# Do a parallel build with multiple jobs, based on the number of CPUs online
# in this system: 'make -j8' on a 8-CPU system, etc.
#
# (To override it, run 'make JOBS=1' and similar.)
#
ifeq ($(JOBS),)
JOBS := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
ifeq ($(JOBS),0)
JOBS := 1
endif
endif
#
# Only pass canonical directory names as the output directory:
#
ifneq ($(O),)
FULL_O := $(shell readlink -f $(O) || echo $(O))
endif
#
# Only accept the 'DEBUG' variable from the command line:
#
ifeq ("$(origin DEBUG)", "command line")
ifeq ($(DEBUG),)
override DEBUG = 0
else
SET_DEBUG = "DEBUG=$(DEBUG)"
endif
else
override DEBUG = 0
endif
define print_msg
@printf ' BUILD: Doing '\''make \033[33m-j'$(JOBS)'\033[m'\'' parallel build\n'
endef
define make
@$(MAKE) -f Makefile.perf --no-print-directory -j$(JOBS) O=$(FULL_O) $(SET_DEBUG) $@
endef
#
# Needed if no target specified:
# (Except for tags and TAGS targets. The reason is that the
# Makefile does not treat tags/TAGS as targets but as files
# and thus won't rebuilt them once they are in place.)
#
all tags TAGS:
$(print_msg)
$(make)
ifdef MAKECMDGOALS
has_clean := 0
ifneq ($(filter clean,$(MAKECMDGOALS)),)
has_clean := 1
endif # clean
ifeq ($(has_clean),1)
rest := $(filter-out clean,$(MAKECMDGOALS))
ifneq ($(rest),)
$(rest): clean
endif # rest
endif # has_clean
endif # MAKECMDGOALS
#
# The clean target is not really parallel, don't print the jobs info:
#
clean:
$(make)
#
# The build-test target is not really parallel, don't print the jobs info,
# it also uses only the tests/make targets that don't pollute the source
# repository, i.e. that uses O= or builds the tarpkg outside the source
# repo directories.
#
# For a full test, use:
#
# make -C tools/perf -f tests/make
#
build-test:
@$(MAKE) SHUF=1 -f tests/make REUSE_FEATURES_DUMP=1 MK=Makefile SET_PARALLEL=1 --no-print-directory tarpkg out
#
# All other targets get passed through:
#
%: FORCE
$(print_msg)
$(make)
.PHONY: tags TAGS FORCE Makefile

1007
Makefile.config Normal file

File diff suppressed because it is too large Load diff

887
Makefile.perf Normal file
View file

@ -0,0 +1,887 @@
include ../scripts/Makefile.include
# The default target of this Makefile is...
all:
include ../scripts/utilities.mak
# Define V to have a more verbose compile.
#
# Define VF to have a more verbose feature check output.
#
# Define O to save output files in a separate directory.
#
# Define ARCH as name of target architecture if you want cross-builds.
#
# Define CROSS_COMPILE as prefix name of compiler if you want cross-builds.
#
# Define NO_LIBPERL to disable perl script extension.
#
# Define NO_LIBPYTHON to disable python script extension.
#
# Define PYTHON to point to the python binary if the default
# `python' is not correct; for example: PYTHON=python2
#
# Define PYTHON_CONFIG to point to the python-config binary if
# the default `$(PYTHON)-config' is not correct.
#
# Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8
#
# Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72.
#
# Define LDFLAGS=-static to build a static binary.
#
# Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds.
#
# Define EXCLUDE_EXTLIBS=-lmylib to exclude libmylib from the auto-generated
# EXTLIBS.
#
# Define EXTRA_PERFLIBS to pass extra libraries to PERFLIBS.
#
# Define NO_DWARF if you do not want debug-info analysis feature at all.
#
# Define WERROR=0 to disable treating any warnings as errors.
#
# Define NO_NEWT if you do not want TUI support. (deprecated)
#
# Define NO_SLANG if you do not want TUI support.
#
# Define NO_GTK2 if you do not want GTK+ GUI support.
#
# Define NO_DEMANGLE if you do not want C++ symbol demangling.
#
# Define NO_LIBELF if you do not want libelf dependency (e.g. cross-builds)
#
# Define NO_LIBUNWIND if you do not want libunwind dependency for dwarf
# backtrace post unwind.
#
# Define NO_BACKTRACE if you do not want stack backtrace debug feature
#
# Define NO_LIBNUMA if you do not want numa perf benchmark
#
# Define NO_LIBAUDIT if you do not want libaudit support
#
# Define NO_LIBBIONIC if you do not want bionic support
#
# Define NO_LIBCRYPTO if you do not want libcrypto (openssl) support
# used for generating build-ids for ELFs generated by jitdump.
#
# Define NO_LIBDW_DWARF_UNWIND if you do not want libdw support
# for dwarf backtrace post unwind.
#
# Define NO_PERF_READ_VDSO32 if you do not want to build perf-read-vdso32
# for reading the 32-bit compatibility VDSO in 64-bit mode
#
# Define NO_PERF_READ_VDSOX32 if you do not want to build perf-read-vdsox32
# for reading the x32 mode 32-bit compatibility VDSO in 64-bit mode
#
# Define NO_ZLIB if you do not want to support compressed kernel modules
#
# Define NO_LIBBABELTRACE if you do not want libbabeltrace support
# for CTF data format.
#
# Define NO_LZMA if you do not want to support compressed (xz) kernel modules
#
# Define NO_AUXTRACE if you do not want AUX area tracing support
#
# Define NO_LIBBPF if you do not want BPF support
#
# Define NO_SDT if you do not want to define SDT event in perf tools,
# note that it doesn't disable SDT scanning support.
#
# Define FEATURES_DUMP to provide features detection dump file
# and bypass the feature detection
#
# Define NO_JVMTI if you do not want jvmti agent built
#
# Define LIBCLANGLLVM if you DO want builtin clang and llvm support.
# When selected, pass LLVM_CONFIG=/path/to/llvm-config to `make' if
# llvm-config is not in $PATH.
# Define NO_CORESIGHT if you do not want support for CoreSight trace decoding.
# As per kernel Makefile, avoid funny character set dependencies
unexport LC_ALL
LC_COLLATE=C
LC_NUMERIC=C
export LC_COLLATE LC_NUMERIC
ifeq ($(srctree),)
srctree := $(patsubst %/,%,$(dir $(CURDIR)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
#$(info Determined 'srctree' to be $(srctree))
endif
ifneq ($(objtree),)
#$(info Determined 'objtree' to be $(objtree))
endif
ifneq ($(OUTPUT),)
#$(info Determined 'OUTPUT' to be $(OUTPUT))
# Adding $(OUTPUT) as a directory to look for source files,
# because use generated output files as sources dependency
# for flex/bison parsers.
VPATH += $(OUTPUT)
export VPATH
endif
ifeq ($(V),1)
Q =
else
Q = @
endif
# Do not use make's built-in rules
# (this improves performance and avoids hard-to-debug behaviour);
MAKEFLAGS += -r
# Makefiles suck: This macro sets a default value of $(2) for the
# variable named by $(1), unless the variable has been set by
# environment or command line. This is necessary for CC and AR
# because make sets default values, so the simpler ?= approach
# won't work as expected.
define allow-override
$(if $(or $(findstring environment,$(origin $(1))),\
$(findstring command line,$(origin $(1)))),,\
$(eval $(1) = $(2)))
endef
LD += $(EXTRA_LDFLAGS)
HOSTCC ?= gcc
HOSTLD ?= ld
HOSTAR ?= ar
PKG_CONFIG = $(CROSS_COMPILE)pkg-config
LLVM_CONFIG ?= llvm-config
RM = rm -f
LN = ln -f
MKDIR = mkdir
FIND = find
INSTALL = install
FLEX ?= flex
BISON ?= bison
STRIP = strip
AWK = awk
# include Makefile.config by default and rule out
# non-config cases
config := 1
NON_CONFIG_TARGETS := clean python-clean TAGS tags cscope help install-doc install-man install-html install-info install-pdf doc man html info pdf
ifdef MAKECMDGOALS
ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),)
config := 0
endif
endif
# The fixdep build - we force fixdep tool to be built as
# the first target in the separate make session not to be
# disturbed by any parallel make jobs. Once fixdep is done
# we issue the requested build with FIXDEP=1 variable.
#
# The fixdep build is disabled for $(NON_CONFIG_TARGETS)
# targets, because it's not necessary.
ifdef FIXDEP
force_fixdep := 0
else
force_fixdep := $(config)
endif
export srctree OUTPUT RM CC CXX LD AR CFLAGS CXXFLAGS V BISON FLEX AWK
export HOSTCC HOSTLD HOSTAR
include $(srctree)/tools/build/Makefile.include
ifeq ($(force_fixdep),1)
goals := $(filter-out all sub-make, $(MAKECMDGOALS))
$(goals) all: sub-make
sub-make: fixdep
@./check-headers.sh
$(Q)$(MAKE) FIXDEP=1 -f Makefile.perf $(goals)
else # force_fixdep
LIB_DIR = $(srctree)/tools/lib/api/
TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
BPF_DIR = $(srctree)/tools/lib/bpf/
SUBCMD_DIR = $(srctree)/tools/lib/subcmd/
# Set FEATURE_TESTS to 'all' so all possible feature checkers are executed.
# Without this setting the output feature dump file misses some features, for
# example, liberty. Select all checkers so we won't get an incomplete feature
# dump file.
ifeq ($(config),1)
ifdef MAKECMDGOALS
ifeq ($(filter feature-dump,$(MAKECMDGOALS)),feature-dump)
FEATURE_TESTS := all
endif
endif
include Makefile.config
endif
ifeq ($(config),0)
include $(srctree)/tools/scripts/Makefile.arch
-include arch/$(SRCARCH)/Makefile
endif
# The FEATURE_DUMP_EXPORT holds location of the actual
# FEATURE_DUMP file to be used to bypass feature detection
# (for bpf or any other subproject)
ifeq ($(FEATURES_DUMP),)
FEATURE_DUMP_EXPORT := $(realpath $(OUTPUT)FEATURE-DUMP)
else
FEATURE_DUMP_EXPORT := $(realpath $(FEATURES_DUMP))
endif
export prefix bindir sharedir sysconfdir DESTDIR
# sparse is architecture-neutral, which means that we need to tell it
# explicitly what architecture to check for. Fix this up for yours..
SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
# Guard against environment variables
PYRF_OBJS =
SCRIPT_SH =
SCRIPT_SH += perf-archive.sh
SCRIPT_SH += perf-with-kcore.sh
grep-libs = $(filter -l%,$(1))
strip-libs = $(filter-out -l%,$(1))
ifneq ($(OUTPUT),)
TE_PATH=$(OUTPUT)
BPF_PATH=$(OUTPUT)
SUBCMD_PATH=$(OUTPUT)
ifneq ($(subdir),)
API_PATH=$(OUTPUT)/../lib/api/
else
API_PATH=$(OUTPUT)
endif
else
TE_PATH=$(TRACE_EVENT_DIR)
API_PATH=$(LIB_DIR)
BPF_PATH=$(BPF_DIR)
SUBCMD_PATH=$(SUBCMD_DIR)
endif
LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
export LIBTRACEEVENT
LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)libtraceevent-dynamic-list
#
# The static build has no dynsym table, so this does not work for
# static build. Looks like linker starts to scream about that now
# (in Fedora 26) so we need to switch it off for static build.
DYNAMIC_LIST_LDFLAGS = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST)
LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = $(if $(findstring -static,$(LDFLAGS)),,$(DYNAMIC_LIST_LDFLAGS))
LIBAPI = $(API_PATH)libapi.a
export LIBAPI
LIBBPF = $(BPF_PATH)libbpf.a
LIBSUBCMD = $(SUBCMD_PATH)libsubcmd.a
# python extension build directories
PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/
PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/
PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/
export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT)python/perf*.so
PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI)
SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH))
PROGRAMS += $(OUTPUT)perf
ifndef NO_PERF_READ_VDSO32
PROGRAMS += $(OUTPUT)perf-read-vdso32
endif
ifndef NO_PERF_READ_VDSOX32
PROGRAMS += $(OUTPUT)perf-read-vdsox32
endif
LIBJVMTI = libperf-jvmti.so
ifndef NO_JVMTI
PROGRAMS += $(OUTPUT)$(LIBJVMTI)
endif
# what 'all' will build and 'install' will install, in perfexecdir
ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
# what 'all' will build but not install in perfexecdir
OTHER_PROGRAMS = $(OUTPUT)perf
# Set paths to tools early so that they can be used for version tests.
ifndef SHELL_PATH
SHELL_PATH = /bin/sh
endif
ifndef PERL_PATH
PERL_PATH = /usr/bin/perl
endif
export PERL_PATH
LIB_FILE=$(OUTPUT)libperf.a
PERFLIBS = $(LIB_FILE) $(LIBAPI) $(LIBTRACEEVENT) $(LIBSUBCMD)
ifndef NO_LIBBPF
PERFLIBS += $(LIBBPF)
endif
# We choose to avoid "if .. else if .. else .. endif endif"
# because maintaining the nesting to match is a pain. If
# we had "elif" things would have been much nicer...
ifneq ($(OUTPUT),)
CFLAGS += -I$(OUTPUT)
endif
ifndef NO_GTK2
ALL_PROGRAMS += $(OUTPUT)libperf-gtk.so
GTK_IN := $(OUTPUT)gtk-in.o
endif
ifdef ASCIIDOC8
export ASCIIDOC8
endif
EXTLIBS := $(call filter-out,$(EXCLUDE_EXTLIBS),$(EXTLIBS))
LIBS = -Wl,--whole-archive $(PERFLIBS) $(EXTRA_PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group
ifeq ($(USE_CLANG), 1)
CLANGLIBS_LIST = AST Basic CodeGen Driver Frontend Lex Tooling Edit Sema Analysis Parse Serialization
CLANGLIBS_NOEXT_LIST = $(foreach l,$(CLANGLIBS_LIST),$(shell $(LLVM_CONFIG) --libdir)/libclang$(l))
LIBCLANG = $(foreach l,$(CLANGLIBS_NOEXT_LIST),$(wildcard $(l).a $(l).so))
LIBS += -Wl,--start-group $(LIBCLANG) -Wl,--end-group
endif
ifeq ($(USE_LLVM), 1)
LIBLLVM = $(shell $(LLVM_CONFIG) --libs all) $(shell $(LLVM_CONFIG) --system-libs)
LIBS += -L$(shell $(LLVM_CONFIG) --libdir) $(LIBLLVM)
endif
ifeq ($(USE_CXX), 1)
LIBS += -lstdc++
endif
export INSTALL SHELL_PATH
### Build rules
SHELL = $(SHELL_PATH)
beauty_outdir := $(OUTPUT)trace/beauty/generated
beauty_ioctl_outdir := $(beauty_outdir)/ioctl
drm_ioctl_array := $(beauty_ioctl_outdir)/drm_ioctl_array.c
drm_hdr_dir := $(srctree)/tools/include/uapi/drm
drm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/drm_ioctl.sh
# Create output directory if not already present
_dummy := $(shell [ -d '$(beauty_ioctl_outdir)' ] || mkdir -p '$(beauty_ioctl_outdir)')
$(drm_ioctl_array): $(drm_hdr_dir)/drm.h $(drm_hdr_dir)/i915_drm.h $(drm_ioctl_tbl)
$(Q)$(SHELL) '$(drm_ioctl_tbl)' $(drm_hdr_dir) > $@
pkey_alloc_access_rights_array := $(beauty_outdir)/pkey_alloc_access_rights_array.c
asm_generic_hdr_dir := $(srctree)/tools/include/uapi/asm-generic/
pkey_alloc_access_rights_tbl := $(srctree)/tools/perf/trace/beauty/pkey_alloc_access_rights.sh
$(pkey_alloc_access_rights_array): $(asm_generic_hdr_dir)/mman-common.h $(pkey_alloc_access_rights_tbl)
$(Q)$(SHELL) '$(pkey_alloc_access_rights_tbl)' $(asm_generic_hdr_dir) > $@
sndrv_ctl_ioctl_array := $(beauty_ioctl_outdir)/sndrv_ctl_ioctl_array.c
sndrv_ctl_hdr_dir := $(srctree)/tools/include/uapi/sound
sndrv_ctl_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh
$(sndrv_ctl_ioctl_array): $(sndrv_ctl_hdr_dir)/asound.h $(sndrv_ctl_ioctl_tbl)
$(Q)$(SHELL) '$(sndrv_ctl_ioctl_tbl)' $(sndrv_ctl_hdr_dir) > $@
sndrv_pcm_ioctl_array := $(beauty_ioctl_outdir)/sndrv_pcm_ioctl_array.c
sndrv_pcm_hdr_dir := $(srctree)/tools/include/uapi/sound
sndrv_pcm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh
$(sndrv_pcm_ioctl_array): $(sndrv_pcm_hdr_dir)/asound.h $(sndrv_pcm_ioctl_tbl)
$(Q)$(SHELL) '$(sndrv_pcm_ioctl_tbl)' $(sndrv_pcm_hdr_dir) > $@
kcmp_type_array := $(beauty_outdir)/kcmp_type_array.c
kcmp_hdr_dir := $(srctree)/tools/include/uapi/linux/
kcmp_type_tbl := $(srctree)/tools/perf/trace/beauty/kcmp_type.sh
$(kcmp_type_array): $(kcmp_hdr_dir)/kcmp.h $(kcmp_type_tbl)
$(Q)$(SHELL) '$(kcmp_type_tbl)' $(kcmp_hdr_dir) > $@
kvm_ioctl_array := $(beauty_ioctl_outdir)/kvm_ioctl_array.c
kvm_hdr_dir := $(srctree)/tools/include/uapi/linux
kvm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/kvm_ioctl.sh
$(kvm_ioctl_array): $(kvm_hdr_dir)/kvm.h $(kvm_ioctl_tbl)
$(Q)$(SHELL) '$(kvm_ioctl_tbl)' $(kvm_hdr_dir) > $@
vhost_virtio_ioctl_array := $(beauty_ioctl_outdir)/vhost_virtio_ioctl_array.c
vhost_virtio_hdr_dir := $(srctree)/tools/include/uapi/linux
vhost_virtio_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
$(vhost_virtio_ioctl_array): $(vhost_virtio_hdr_dir)/vhost.h $(vhost_virtio_ioctl_tbl)
$(Q)$(SHELL) '$(vhost_virtio_ioctl_tbl)' $(vhost_virtio_hdr_dir) > $@
perf_ioctl_array := $(beauty_ioctl_outdir)/perf_ioctl_array.c
perf_hdr_dir := $(srctree)/tools/include/uapi/linux
perf_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/perf_ioctl.sh
$(perf_ioctl_array): $(perf_hdr_dir)/perf_event.h $(perf_ioctl_tbl)
$(Q)$(SHELL) '$(perf_ioctl_tbl)' $(perf_hdr_dir) > $@
madvise_behavior_array := $(beauty_outdir)/madvise_behavior_array.c
madvise_hdr_dir := $(srctree)/tools/include/uapi/asm-generic/
madvise_behavior_tbl := $(srctree)/tools/perf/trace/beauty/madvise_behavior.sh
$(madvise_behavior_array): $(madvise_hdr_dir)/mman-common.h $(madvise_behavior_tbl)
$(Q)$(SHELL) '$(madvise_behavior_tbl)' $(madvise_hdr_dir) > $@
prctl_option_array := $(beauty_outdir)/prctl_option_array.c
prctl_hdr_dir := $(srctree)/tools/include/uapi/linux/
prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh
$(prctl_option_array): $(prctl_hdr_dir)/prctl.h $(prctl_option_tbl)
$(Q)$(SHELL) '$(prctl_option_tbl)' $(prctl_hdr_dir) > $@
arch_errno_name_array := $(beauty_outdir)/arch_errno_name_array.c
arch_errno_hdr_dir := $(srctree)/tools
arch_errno_tbl := $(srctree)/tools/perf/trace/beauty/arch_errno_names.sh
$(arch_errno_name_array): $(arch_errno_tbl)
$(Q)$(SHELL) '$(arch_errno_tbl)' $(CC) $(arch_errno_hdr_dir) > $@
all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS)
$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST)
$(QUIET_GEN)LDSHARED="$(CC) -pthread -shared" \
CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS)' \
$(PYTHON_WORD) util/setup.py \
--quiet build_ext; \
mkdir -p $(OUTPUT)python && \
cp $(PYTHON_EXTBUILD_LIB)perf*.so $(OUTPUT)python/
please_set_SHELL_PATH_to_a_more_modern_shell:
$(Q)$$(:)
shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell
strip: $(PROGRAMS) $(OUTPUT)perf
$(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf
PERF_IN := $(OUTPUT)perf-in.o
JEVENTS := $(OUTPUT)pmu-events/jevents
JEVENTS_IN := $(OUTPUT)pmu-events/jevents-in.o
PMU_EVENTS_IN := $(OUTPUT)pmu-events/pmu-events-in.o
export JEVENTS
build := -f $(srctree)/tools/build/Makefile.build dir=. obj
$(PERF_IN): prepare FORCE
$(Q)$(MAKE) $(build)=perf
$(JEVENTS_IN): FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=jevents
$(JEVENTS): $(JEVENTS_IN)
$(QUIET_LINK)$(HOSTCC) $(JEVENTS_IN) -o $@
$(PMU_EVENTS_IN): $(JEVENTS) FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events
$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \
$(PERF_IN) $(PMU_EVENTS_IN) $(LIBS) -o $@
$(GTK_IN): FORCE
$(Q)$(MAKE) $(build)=gtk
$(OUTPUT)libperf-gtk.so: $(GTK_IN) $(PERFLIBS)
$(QUIET_LINK)$(CC) -o $@ -shared $(LDFLAGS) $(filter %.o,$^) $(GTK_LIBS)
$(OUTPUT)common-cmds.h: util/generate-cmdlist.sh command-list.txt
$(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt)
$(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@
$(SCRIPTS) : % : %.sh
$(QUIET_GEN)$(INSTALL) '$@.sh' '$(OUTPUT)$@'
$(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD
$(Q)$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
$(Q)touch $(OUTPUT)PERF-VERSION-FILE
# These can record PERF_VERSION
perf.spec $(SCRIPTS) \
: $(OUTPUT)PERF-VERSION-FILE
.SUFFIXES:
#
# If a target does not match any of the later rules then prefix it by $(OUTPUT)
# This makes targets like 'make O=/tmp/perf perf.o' work in a natural way.
#
ifneq ($(OUTPUT),)
%.o: $(OUTPUT)%.o
@echo " # Redirected target $@ => $(OUTPUT)$@"
pmu-events/%.o: $(OUTPUT)pmu-events/%.o
@echo " # Redirected target $@ => $(OUTPUT)$@"
util/%.o: $(OUTPUT)util/%.o
@echo " # Redirected target $@ => $(OUTPUT)$@"
bench/%.o: $(OUTPUT)bench/%.o
@echo " # Redirected target $@ => $(OUTPUT)$@"
tests/%.o: $(OUTPUT)tests/%.o
@echo " # Redirected target $@ => $(OUTPUT)$@"
endif
# These two need to be here so that when O= is not used they take precedence
# over the general rule for .o
# get relative building directory (to $(OUTPUT))
# and '.' if it's $(OUTPUT) itself
__build-dir = $(subst $(OUTPUT),,$(dir $@))
build-dir = $(if $(__build-dir),$(__build-dir),.)
prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioctl_array) \
$(pkey_alloc_access_rights_array) \
$(sndrv_pcm_ioctl_array) \
$(sndrv_ctl_ioctl_array) \
$(kcmp_type_array) \
$(kvm_ioctl_array) \
$(vhost_virtio_ioctl_array) \
$(madvise_behavior_array) \
$(perf_ioctl_array) \
$(prctl_option_array) \
$(arch_errno_name_array)
$(OUTPUT)%.o: %.c prepare FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
$(OUTPUT)%.i: %.c prepare FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
$(OUTPUT)%.s: %.c prepare FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
$(OUTPUT)%-bison.o: %.c prepare FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
$(OUTPUT)%-flex.o: %.c prepare FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
$(OUTPUT)%.o: %.S prepare FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
$(OUTPUT)%.i: %.S prepare FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
$(OUTPUT)perf-%: %.o $(PERFLIBS)
$(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $(filter %.o,$^) $(LIBS)
ifndef NO_PERF_READ_VDSO32
$(OUTPUT)perf-read-vdso32: perf-read-vdso.c util/find-vdso-map.c
$(QUIET_CC)$(CC) -m32 $(filter -static,$(LDFLAGS)) -Wall -Werror -o $@ perf-read-vdso.c
endif
ifndef NO_PERF_READ_VDSOX32
$(OUTPUT)perf-read-vdsox32: perf-read-vdso.c util/find-vdso-map.c
$(QUIET_CC)$(CC) -mx32 $(filter -static,$(LDFLAGS)) -Wall -Werror -o $@ perf-read-vdso.c
endif
ifndef NO_JVMTI
LIBJVMTI_IN := $(OUTPUT)jvmti/jvmti-in.o
$(LIBJVMTI_IN): FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=jvmti obj=jvmti
$(OUTPUT)$(LIBJVMTI): $(LIBJVMTI_IN)
$(QUIET_LINK)$(CC) -shared -Wl,-soname -Wl,$(LIBJVMTI) -o $@ $<
endif
$(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)
LIBPERF_IN := $(OUTPUT)libperf-in.o
$(LIBPERF_IN): prepare FORCE
$(Q)$(MAKE) $(build)=libperf
$(LIB_FILE): $(LIBPERF_IN)
$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS)
LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ)
$(LIBTRACEEVENT): FORCE
$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a
libtraceevent_plugins: FORCE
$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins
$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins
$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent-dynamic-list
$(LIBTRACEEVENT)-clean:
$(call QUIET_CLEAN, libtraceevent)
$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null
install-traceevent-plugins: libtraceevent_plugins
$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins
$(LIBAPI): FORCE
$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a
$(LIBAPI)-clean:
$(call QUIET_CLEAN, libapi)
$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
$(LIBBPF): FORCE
$(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT)
$(LIBBPF)-clean:
$(call QUIET_CLEAN, libbpf)
$(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) clean >/dev/null
$(LIBSUBCMD): FORCE
$(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) $(OUTPUT)libsubcmd.a
$(LIBSUBCMD)-clean:
$(call QUIET_CLEAN, libsubcmd)
$(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) clean
help:
@echo 'Perf make targets:'
@echo ' doc - make *all* documentation (see below)'
@echo ' man - make manpage documentation (access with man <foo>)'
@echo ' html - make html documentation'
@echo ' info - make GNU info documentation (access with info <foo>)'
@echo ' pdf - make pdf documentation'
@echo ' TAGS - use etags to make tag information for source browsing'
@echo ' tags - use ctags to make tag information for source browsing'
@echo ' cscope - use cscope to make interactive browsing database'
@echo ''
@echo 'Perf install targets:'
@echo ' NOTE: documentation build requires asciidoc, xmlto packages to be installed'
@echo ' HINT: use "prefix" or "DESTDIR" to install to a particular'
@echo ' path like "make prefix=/usr/local install install-doc"'
@echo ' install - install compiled binaries'
@echo ' install-doc - install *all* documentation'
@echo ' install-man - install manpage documentation'
@echo ' install-html - install html documentation'
@echo ' install-info - install GNU info documentation'
@echo ' install-pdf - install pdf documentation'
@echo ''
@echo ' quick-install-doc - alias for quick-install-man'
@echo ' quick-install-man - install the documentation quickly'
@echo ' quick-install-html - install the html documentation quickly'
@echo ''
@echo 'Perf maintainer targets:'
@echo ' clean - clean all binary objects and build output'
DOC_TARGETS := doc man html info pdf
INSTALL_DOC_TARGETS := $(patsubst %,install-%,$(DOC_TARGETS)) try-install-man
INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html
# 'make doc' should call 'make -C Documentation all'
$(DOC_TARGETS):
$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all)
TAG_FOLDERS= . ../lib ../include
TAG_FILES= ../../include/uapi/linux/perf_event.h
TAGS:
$(QUIET_GEN)$(RM) TAGS; \
$(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print -o -name '*.cpp' -print | xargs etags -a $(TAG_FILES)
tags:
$(QUIET_GEN)$(RM) tags; \
$(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print -o -name '*.cpp' -print | xargs ctags -a $(TAG_FILES)
cscope:
$(QUIET_GEN)$(RM) cscope*; \
$(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print -o -name '*.cpp' -print | xargs cscope -b $(TAG_FILES)
### Testing rules
# GNU make supports exporting all variables by "export" without parameters.
# However, the environment gets quite big, and some programs have problems
# with that.
check: $(OUTPUT)common-cmds.h
if sparse; \
then \
for i in *.c */*.c; \
do \
sparse $(CFLAGS) $(SPARSE_FLAGS) $$i || exit; \
done; \
else \
exit 1; \
fi
### Installation rules
ifndef NO_GTK2
install-gtk: $(OUTPUT)libperf-gtk.so
$(call QUIET_INSTALL, 'GTK UI') \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(libdir_SQ)'; \
$(INSTALL) $(OUTPUT)libperf-gtk.so '$(DESTDIR_SQ)$(libdir_SQ)'
else
install-gtk:
endif
install-tools: all install-gtk
$(call QUIET_INSTALL, binaries) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'; \
$(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'; \
$(LN) '$(DESTDIR_SQ)$(bindir_SQ)/perf' '$(DESTDIR_SQ)$(bindir_SQ)/trace'
ifndef NO_PERF_READ_VDSO32
$(call QUIET_INSTALL, perf-read-vdso32) \
$(INSTALL) $(OUTPUT)perf-read-vdso32 '$(DESTDIR_SQ)$(bindir_SQ)';
endif
ifndef NO_PERF_READ_VDSOX32
$(call QUIET_INSTALL, perf-read-vdsox32) \
$(INSTALL) $(OUTPUT)perf-read-vdsox32 '$(DESTDIR_SQ)$(bindir_SQ)';
endif
ifndef NO_JVMTI
$(call QUIET_INSTALL, $(LIBJVMTI)) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(libdir_SQ)'; \
$(INSTALL) $(OUTPUT)$(LIBJVMTI) '$(DESTDIR_SQ)$(libdir_SQ)';
endif
$(call QUIET_INSTALL, libexec) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
$(call QUIET_INSTALL, perf-archive) \
$(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
$(call QUIET_INSTALL, perf-with-kcore) \
$(INSTALL) $(OUTPUT)perf-with-kcore -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
ifndef NO_LIBAUDIT
$(call QUIET_INSTALL, strace/groups) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(STRACE_GROUPS_INSTDIR_SQ)'; \
$(INSTALL) trace/strace/groups/* -t '$(DESTDIR_SQ)$(STRACE_GROUPS_INSTDIR_SQ)'
endif
ifndef NO_LIBPERL
$(call QUIET_INSTALL, perl-scripts) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
$(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
$(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'; \
$(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
endif
ifndef NO_LIBPYTHON
$(call QUIET_INSTALL, python-scripts) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'; \
$(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \
$(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'; \
$(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
endif
$(call QUIET_INSTALL, perf_completion-script) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'; \
$(INSTALL) perf-completion.sh '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf'
$(call QUIET_INSTALL, perf-tip) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(tip_instdir_SQ)'; \
$(INSTALL) Documentation/tips.txt -t '$(DESTDIR_SQ)$(tip_instdir_SQ)'
install-tests: all install-gtk
$(call QUIET_INSTALL, tests) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
$(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
$(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \
$(INSTALL) tests/shell/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \
$(INSTALL) tests/shell/lib/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'
install-bin: install-tools install-tests install-traceevent-plugins
install: install-bin try-install-man
install-python_ext:
$(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)'
# 'make install-doc' should call 'make -C Documentation install'
$(INSTALL_DOC_TARGETS):
$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:-doc=)
### Cleaning rules
#
# This is here, not in Makefile.config, because Makefile.config does
# not get included for the clean target:
#
config-clean:
$(call QUIET_CLEAN, config)
$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null
python-clean:
$(python-clean)
clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean fixdep-clean python-clean
$(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
$(Q)$(RM) $(OUTPUT).config-detected
$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so
$(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
$(OUTPUT)util/intel-pt-decoder/inat-tables.c \
$(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
$(OUTPUT)pmu-events/pmu-events.c \
$(OUTPUT)$(madvise_behavior_array) \
$(OUTPUT)$(drm_ioctl_array) \
$(OUTPUT)$(pkey_alloc_access_rights_array) \
$(OUTPUT)$(sndrv_ctl_ioctl_array) \
$(OUTPUT)$(sndrv_pcm_ioctl_array) \
$(OUTPUT)$(kvm_ioctl_array) \
$(OUTPUT)$(kcmp_type_array) \
$(OUTPUT)$(vhost_virtio_ioctl_array) \
$(OUTPUT)$(perf_ioctl_array) \
$(OUTPUT)$(prctl_option_array) \
$(OUTPUT)$(arch_errno_name_array)
$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
#
# To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY)
# file if defined, with no further action.
feature-dump:
ifdef FEATURE_DUMP_COPY
@cp $(OUTPUT)FEATURE-DUMP $(FEATURE_DUMP_COPY)
@echo "FEATURE-DUMP file copied into $(FEATURE_DUMP_COPY)"
else
@echo "FEATURE-DUMP file available in $(OUTPUT)FEATURE-DUMP"
endif
#
# Trick: if ../../.git does not exist - we are building out of tree for example,
# then force version regeneration:
#
ifeq ($(wildcard ../../.git/HEAD),)
GIT-HEAD-PHONY = ../../.git/HEAD
else
GIT-HEAD-PHONY =
endif
FORCE:
.PHONY: all install clean config-clean strip install-gtk
.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE prepare
.PHONY: libtraceevent_plugins archheaders
endif # force_fixdep

2
arch/Build Normal file
View file

@ -0,0 +1,2 @@
libperf-y += common.o
libperf-y += $(SRCARCH)/

1
arch/alpha/Build Normal file
View file

@ -0,0 +1 @@
# empty

2
arch/arm/Build Normal file
View file

@ -0,0 +1,2 @@
libperf-y += util/
libperf-$(CONFIG_DWARF_UNWIND) += tests/

4
arch/arm/Makefile Normal file
View file

@ -0,0 +1,4 @@
ifndef NO_DWARF
PERF_HAVE_DWARF_REGS := 1
endif
PERF_HAVE_JITDUMP := 1

View file

@ -0,0 +1,61 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/compiler.h>
#include <sys/types.h>
#include <regex.h>
struct arm_annotate {
regex_t call_insn,
jump_insn;
};
static struct ins_ops *arm__associate_instruction_ops(struct arch *arch, const char *name)
{
struct arm_annotate *arm = arch->priv;
struct ins_ops *ops;
regmatch_t match[2];
if (!regexec(&arm->call_insn, name, 2, match, 0))
ops = &call_ops;
else if (!regexec(&arm->jump_insn, name, 2, match, 0))
ops = &jump_ops;
else
return NULL;
arch__associate_ins_ops(arch, name, ops);
return ops;
}
static int arm__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
{
struct arm_annotate *arm;
int err;
if (arch->initialized)
return 0;
arm = zalloc(sizeof(*arm));
if (!arm)
return -1;
#define ARM_CONDS "(cc|cs|eq|ge|gt|hi|le|ls|lt|mi|ne|pl|vc|vs)"
err = regcomp(&arm->call_insn, "^blx?" ARM_CONDS "?$", REG_EXTENDED);
if (err)
goto out_free_arm;
err = regcomp(&arm->jump_insn, "^bx?" ARM_CONDS "?$", REG_EXTENDED);
if (err)
goto out_free_call;
#undef ARM_CONDS
arch->initialized = true;
arch->priv = arm;
arch->associate_instruction_ops = arm__associate_instruction_ops;
arch->objdump.comment_char = ';';
arch->objdump.skip_functions_char = '+';
return 0;
out_free_call:
regfree(&arm->call_insn);
out_free_arm:
free(arm);
return -1;
}

View file

@ -0,0 +1,12 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef ARCH_TESTS_H
#define ARCH_TESTS_H
#ifdef HAVE_DWARF_UNWIND_SUPPORT
struct thread;
struct perf_sample;
#endif
extern struct test arch_tests[];
#endif

View file

@ -0,0 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifdef DEFINE_DWARF_REGSTR_TABLE
/* This is included in perf/util/dwarf-regs.c */
static const char * const arm_regstr_tbl[] = {
"%r0", "%r1", "%r2", "%r3", "%r4",
"%r5", "%r6", "%r7", "%r8", "%r9", "%r10",
"%fp", "%ip", "%sp", "%lr", "%pc",
};
#endif

View file

@ -0,0 +1,60 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef ARCH_PERF_REGS_H
#define ARCH_PERF_REGS_H
#include <stdlib.h>
#include <linux/types.h>
#include <asm/perf_regs.h>
void perf_regs_load(u64 *regs);
#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM_MAX) - 1)
#define PERF_REGS_MAX PERF_REG_ARM_MAX
#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32
#define PERF_REG_IP PERF_REG_ARM_PC
#define PERF_REG_SP PERF_REG_ARM_SP
static inline const char *perf_reg_name(int id)
{
switch (id) {
case PERF_REG_ARM_R0:
return "r0";
case PERF_REG_ARM_R1:
return "r1";
case PERF_REG_ARM_R2:
return "r2";
case PERF_REG_ARM_R3:
return "r3";
case PERF_REG_ARM_R4:
return "r4";
case PERF_REG_ARM_R5:
return "r5";
case PERF_REG_ARM_R6:
return "r6";
case PERF_REG_ARM_R7:
return "r7";
case PERF_REG_ARM_R8:
return "r8";
case PERF_REG_ARM_R9:
return "r9";
case PERF_REG_ARM_R10:
return "r10";
case PERF_REG_ARM_FP:
return "fp";
case PERF_REG_ARM_IP:
return "ip";
case PERF_REG_ARM_SP:
return "sp";
case PERF_REG_ARM_LR:
return "lr";
case PERF_REG_ARM_PC:
return "pc";
default:
return NULL;
}
return NULL;
}
#endif /* ARCH_PERF_REGS_H */

4
arch/arm/tests/Build Normal file
View file

@ -0,0 +1,4 @@
libperf-y += regs_load.o
libperf-y += dwarf-unwind.o
libperf-y += arch-tests.o

View file

@ -0,0 +1,16 @@
// SPDX-License-Identifier: GPL-2.0
#include <string.h>
#include "tests/tests.h"
#include "arch-tests.h"
struct test arch_tests[] = {
#ifdef HAVE_DWARF_UNWIND_SUPPORT
{
.desc = "DWARF unwind",
.func = test__dwarf_unwind,
},
#endif
{
.func = NULL,
},
};

View file

@ -0,0 +1,62 @@
// SPDX-License-Identifier: GPL-2.0
#include <string.h>
#include "perf_regs.h"
#include "thread.h"
#include "map.h"
#include "event.h"
#include "debug.h"
#include "tests/tests.h"
#define STACK_SIZE 8192
static int sample_ustack(struct perf_sample *sample,
struct thread *thread, u64 *regs)
{
struct stack_dump *stack = &sample->user_stack;
struct map *map;
unsigned long sp;
u64 stack_size, *buf;
buf = malloc(STACK_SIZE);
if (!buf) {
pr_debug("failed to allocate sample uregs data\n");
return -1;
}
sp = (unsigned long) regs[PERF_REG_ARM_SP];
map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
if (!map) {
pr_debug("failed to get stack map\n");
free(buf);
return -1;
}
stack_size = map->end - sp;
stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
memcpy(buf, (void *) sp, stack_size);
stack->data = (char *) buf;
stack->size = stack_size;
return 0;
}
int test__arch_unwind_sample(struct perf_sample *sample,
struct thread *thread)
{
struct regs_dump *regs = &sample->user_regs;
u64 *buf;
buf = calloc(1, sizeof(u64) * PERF_REGS_MAX);
if (!buf) {
pr_debug("failed to allocate sample uregs data\n");
return -1;
}
perf_regs_load(buf);
regs->abi = PERF_SAMPLE_REGS_ABI;
regs->regs = buf;
regs->mask = PERF_REGS_MASK;
return sample_ustack(sample, thread, buf);
}

View file

@ -0,0 +1,59 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
#define R0 0x00
#define R1 0x08
#define R2 0x10
#define R3 0x18
#define R4 0x20
#define R5 0x28
#define R6 0x30
#define R7 0x38
#define R8 0x40
#define R9 0x48
#define SL 0x50
#define FP 0x58
#define IP 0x60
#define SP 0x68
#define LR 0x70
#define PC 0x78
/*
* Implementation of void perf_regs_load(u64 *regs);
*
* This functions fills in the 'regs' buffer from the actual registers values,
* in the way the perf built-in unwinding test expects them:
* - the PC at the time at the call to this function. Since this function
* is called using a bl instruction, the PC value is taken from LR.
* The built-in unwinding test then unwinds the call stack from the dwarf
* information in unwind__get_entries.
*
* Notes:
* - the 8 bytes stride in the registers offsets comes from the fact
* that the registers are stored in an u64 array (u64 *regs),
* - the regs buffer needs to be zeroed before the call to this function,
* in this case using a calloc in dwarf-unwind.c.
*/
.text
.type perf_regs_load,%function
ENTRY(perf_regs_load)
str r0, [r0, #R0]
str r1, [r0, #R1]
str r2, [r0, #R2]
str r3, [r0, #R3]
str r4, [r0, #R4]
str r5, [r0, #R5]
str r6, [r0, #R6]
str r7, [r0, #R7]
str r8, [r0, #R8]
str r9, [r0, #R9]
str sl, [r0, #SL]
str fp, [r0, #FP]
str ip, [r0, #IP]
str sp, [r0, #SP]
str lr, [r0, #LR]
str lr, [r0, #PC] // store pc as lr in order to skip the call
// to this function
mov pc, lr
ENDPROC(perf_regs_load)

6
arch/arm/util/Build Normal file
View file

@ -0,0 +1,6 @@
libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
libperf-$(CONFIG_AUXTRACE) += pmu.o auxtrace.o cs-etm.o

108
arch/arm/util/auxtrace.c Normal file
View file

@ -0,0 +1,108 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright(C) 2015 Linaro Limited. All rights reserved.
* Author: Mathieu Poirier <mathieu.poirier@linaro.org>
*/
#include <stdbool.h>
#include <linux/coresight-pmu.h>
#include "../../util/auxtrace.h"
#include "../../util/evlist.h"
#include "../../util/pmu.h"
#include "cs-etm.h"
#include "arm-spe.h"
static struct perf_pmu **find_all_arm_spe_pmus(int *nr_spes, int *err)
{
struct perf_pmu **arm_spe_pmus = NULL;
int ret, i, nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
/* arm_spe_xxxxxxxxx\0 */
char arm_spe_pmu_name[sizeof(ARM_SPE_PMU_NAME) + 10];
arm_spe_pmus = zalloc(sizeof(struct perf_pmu *) * nr_cpus);
if (!arm_spe_pmus) {
pr_err("spes alloc failed\n");
*err = -ENOMEM;
return NULL;
}
for (i = 0; i < nr_cpus; i++) {
ret = sprintf(arm_spe_pmu_name, "%s%d", ARM_SPE_PMU_NAME, i);
if (ret < 0) {
pr_err("sprintf failed\n");
*err = -ENOMEM;
return NULL;
}
arm_spe_pmus[*nr_spes] = perf_pmu__find(arm_spe_pmu_name);
if (arm_spe_pmus[*nr_spes]) {
pr_debug2("%s %d: arm_spe_pmu %d type %d name %s\n",
__func__, __LINE__, *nr_spes,
arm_spe_pmus[*nr_spes]->type,
arm_spe_pmus[*nr_spes]->name);
(*nr_spes)++;
}
}
return arm_spe_pmus;
}
struct auxtrace_record
*auxtrace_record__init(struct perf_evlist *evlist, int *err)
{
struct perf_pmu *cs_etm_pmu;
struct perf_evsel *evsel;
bool found_etm = false;
bool found_spe = false;
static struct perf_pmu **arm_spe_pmus = NULL;
static int nr_spes = 0;
int i = 0;
if (!evlist)
return NULL;
cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
if (!arm_spe_pmus)
arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err);
evlist__for_each_entry(evlist, evsel) {
if (cs_etm_pmu &&
evsel->attr.type == cs_etm_pmu->type)
found_etm = true;
if (!nr_spes)
continue;
for (i = 0; i < nr_spes; i++) {
if (evsel->attr.type == arm_spe_pmus[i]->type) {
found_spe = true;
break;
}
}
}
if (found_etm && found_spe) {
pr_err("Concurrent ARM Coresight ETM and SPE operation not currently supported\n");
*err = -EOPNOTSUPP;
return NULL;
}
if (found_etm)
return cs_etm_record_init(err);
#if defined(__aarch64__)
if (found_spe)
return arm_spe_recording_init(err, arm_spe_pmus[i]);
#endif
/*
* Clear 'err' even if we haven't found an event - that way perf
* record can still be used even if tracers aren't present. The NULL
* return value will take care of telling the infrastructure HW tracing
* isn't available.
*/
*err = 0;
return NULL;
}

651
arch/arm/util/cs-etm.c Normal file
View file

@ -0,0 +1,651 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright(C) 2015 Linaro Limited. All rights reserved.
* Author: Mathieu Poirier <mathieu.poirier@linaro.org>
*/
#include <api/fs/fs.h>
#include <linux/bitops.h>
#include <linux/compiler.h>
#include <linux/coresight-pmu.h>
#include <linux/kernel.h>
#include <linux/log2.h>
#include <linux/types.h>
#include "cs-etm.h"
#include "../../perf.h"
#include "../../util/auxtrace.h"
#include "../../util/cpumap.h"
#include "../../util/evlist.h"
#include "../../util/evsel.h"
#include "../../util/pmu.h"
#include "../../util/thread_map.h"
#include "../../util/cs-etm.h"
#include <stdlib.h>
#include <sys/stat.h>
#define ENABLE_SINK_MAX 128
#define CS_BUS_DEVICE_PATH "/bus/coresight/devices/"
struct cs_etm_recording {
struct auxtrace_record itr;
struct perf_pmu *cs_etm_pmu;
struct perf_evlist *evlist;
bool snapshot_mode;
size_t snapshot_size;
};
static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu);
static int cs_etm_parse_snapshot_options(struct auxtrace_record *itr,
struct record_opts *opts,
const char *str)
{
struct cs_etm_recording *ptr =
container_of(itr, struct cs_etm_recording, itr);
unsigned long long snapshot_size = 0;
char *endptr;
if (str) {
snapshot_size = strtoull(str, &endptr, 0);
if (*endptr || snapshot_size > SIZE_MAX)
return -1;
}
opts->auxtrace_snapshot_mode = true;
opts->auxtrace_snapshot_size = snapshot_size;
ptr->snapshot_size = snapshot_size;
return 0;
}
static int cs_etm_recording_options(struct auxtrace_record *itr,
struct perf_evlist *evlist,
struct record_opts *opts)
{
struct cs_etm_recording *ptr =
container_of(itr, struct cs_etm_recording, itr);
struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
struct perf_evsel *evsel, *cs_etm_evsel = NULL;
const struct cpu_map *cpus = evlist->cpus;
bool privileged = (geteuid() == 0 || perf_event_paranoid() < 0);
ptr->evlist = evlist;
ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
evlist__for_each_entry(evlist, evsel) {
if (evsel->attr.type == cs_etm_pmu->type) {
if (cs_etm_evsel) {
pr_err("There may be only one %s event\n",
CORESIGHT_ETM_PMU_NAME);
return -EINVAL;
}
evsel->attr.freq = 0;
evsel->attr.sample_period = 1;
cs_etm_evsel = evsel;
opts->full_auxtrace = true;
}
}
/* no need to continue if at least one event of interest was found */
if (!cs_etm_evsel)
return 0;
if (opts->use_clockid) {
pr_err("Cannot use clockid (-k option) with %s\n",
CORESIGHT_ETM_PMU_NAME);
return -EINVAL;
}
/* we are in snapshot mode */
if (opts->auxtrace_snapshot_mode) {
/*
* No size were given to '-S' or '-m,', so go with
* the default
*/
if (!opts->auxtrace_snapshot_size &&
!opts->auxtrace_mmap_pages) {
if (privileged) {
opts->auxtrace_mmap_pages = MiB(4) / page_size;
} else {
opts->auxtrace_mmap_pages =
KiB(128) / page_size;
if (opts->mmap_pages == UINT_MAX)
opts->mmap_pages = KiB(256) / page_size;
}
} else if (!opts->auxtrace_mmap_pages && !privileged &&
opts->mmap_pages == UINT_MAX) {
opts->mmap_pages = KiB(256) / page_size;
}
/*
* '-m,xyz' was specified but no snapshot size, so make the
* snapshot size as big as the auxtrace mmap area.
*/
if (!opts->auxtrace_snapshot_size) {
opts->auxtrace_snapshot_size =
opts->auxtrace_mmap_pages * (size_t)page_size;
}
/*
* -Sxyz was specified but no auxtrace mmap area, so make the
* auxtrace mmap area big enough to fit the requested snapshot
* size.
*/
if (!opts->auxtrace_mmap_pages) {
size_t sz = opts->auxtrace_snapshot_size;
sz = round_up(sz, page_size) / page_size;
opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
}
/* Snapshost size can't be bigger than the auxtrace area */
if (opts->auxtrace_snapshot_size >
opts->auxtrace_mmap_pages * (size_t)page_size) {
pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
opts->auxtrace_snapshot_size,
opts->auxtrace_mmap_pages * (size_t)page_size);
return -EINVAL;
}
/* Something went wrong somewhere - this shouldn't happen */
if (!opts->auxtrace_snapshot_size ||
!opts->auxtrace_mmap_pages) {
pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
return -EINVAL;
}
}
/* We are in full trace mode but '-m,xyz' wasn't specified */
if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
if (privileged) {
opts->auxtrace_mmap_pages = MiB(4) / page_size;
} else {
opts->auxtrace_mmap_pages = KiB(128) / page_size;
if (opts->mmap_pages == UINT_MAX)
opts->mmap_pages = KiB(256) / page_size;
}
}
/* Validate auxtrace_mmap_pages provided by user */
if (opts->auxtrace_mmap_pages) {
unsigned int max_page = (KiB(128) / page_size);
size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
if (!privileged &&
opts->auxtrace_mmap_pages > max_page) {
opts->auxtrace_mmap_pages = max_page;
pr_err("auxtrace too big, truncating to %d\n",
max_page);
}
if (!is_power_of_2(sz)) {
pr_err("Invalid mmap size for %s: must be a power of 2\n",
CORESIGHT_ETM_PMU_NAME);
return -EINVAL;
}
}
if (opts->auxtrace_snapshot_mode)
pr_debug2("%s snapshot size: %zu\n", CORESIGHT_ETM_PMU_NAME,
opts->auxtrace_snapshot_size);
/*
* To obtain the auxtrace buffer file descriptor, the auxtrace
* event must come first.
*/
perf_evlist__to_front(evlist, cs_etm_evsel);
/*
* In the case of per-cpu mmaps, we need the CPU on the
* AUX event.
*/
if (!cpu_map__empty(cpus))
perf_evsel__set_sample_bit(cs_etm_evsel, CPU);
/* Add dummy event to keep tracking */
if (opts->full_auxtrace) {
struct perf_evsel *tracking_evsel;
int err;
err = parse_events(evlist, "dummy:u", NULL);
if (err)
return err;
tracking_evsel = perf_evlist__last(evlist);
perf_evlist__set_tracking_event(evlist, tracking_evsel);
tracking_evsel->attr.freq = 0;
tracking_evsel->attr.sample_period = 1;
/* In per-cpu case, always need the time of mmap events etc */
if (!cpu_map__empty(cpus))
perf_evsel__set_sample_bit(tracking_evsel, TIME);
}
return 0;
}
static u64 cs_etm_get_config(struct auxtrace_record *itr)
{
u64 config = 0;
struct cs_etm_recording *ptr =
container_of(itr, struct cs_etm_recording, itr);
struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
struct perf_evlist *evlist = ptr->evlist;
struct perf_evsel *evsel;
evlist__for_each_entry(evlist, evsel) {
if (evsel->attr.type == cs_etm_pmu->type) {
/*
* Variable perf_event_attr::config is assigned to
* ETMv3/PTM. The bit fields have been made to match
* the ETMv3.5 ETRMCR register specification. See the
* PMU_FORMAT_ATTR() declarations in
* drivers/hwtracing/coresight/coresight-perf.c for
* details.
*/
config = evsel->attr.config;
break;
}
}
return config;
}
#ifndef BIT
#define BIT(N) (1UL << (N))
#endif
static u64 cs_etmv4_get_config(struct auxtrace_record *itr)
{
u64 config = 0;
u64 config_opts = 0;
/*
* The perf event variable config bits represent both
* the command line options and register programming
* bits in ETMv3/PTM. For ETMv4 we must remap options
* to real bits
*/
config_opts = cs_etm_get_config(itr);
if (config_opts & BIT(ETM_OPT_CYCACC))
config |= BIT(ETM4_CFG_BIT_CYCACC);
if (config_opts & BIT(ETM_OPT_TS))
config |= BIT(ETM4_CFG_BIT_TS);
if (config_opts & BIT(ETM_OPT_RETSTK))
config |= BIT(ETM4_CFG_BIT_RETSTK);
return config;
}
static size_t
cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
struct perf_evlist *evlist __maybe_unused)
{
int i;
int etmv3 = 0, etmv4 = 0;
struct cpu_map *event_cpus = evlist->cpus;
struct cpu_map *online_cpus = cpu_map__new(NULL);
/* cpu map is not empty, we have specific CPUs to work with */
if (!cpu_map__empty(event_cpus)) {
for (i = 0; i < cpu__max_cpu(); i++) {
if (!cpu_map__has(event_cpus, i) ||
!cpu_map__has(online_cpus, i))
continue;
if (cs_etm_is_etmv4(itr, i))
etmv4++;
else
etmv3++;
}
} else {
/* get configuration for all CPUs in the system */
for (i = 0; i < cpu__max_cpu(); i++) {
if (!cpu_map__has(online_cpus, i))
continue;
if (cs_etm_is_etmv4(itr, i))
etmv4++;
else
etmv3++;
}
}
cpu_map__put(online_cpus);
return (CS_ETM_HEADER_SIZE +
(etmv4 * CS_ETMV4_PRIV_SIZE) +
(etmv3 * CS_ETMV3_PRIV_SIZE));
}
static const char *metadata_etmv3_ro[CS_ETM_PRIV_MAX] = {
[CS_ETM_ETMCCER] = "mgmt/etmccer",
[CS_ETM_ETMIDR] = "mgmt/etmidr",
};
static const char *metadata_etmv4_ro[CS_ETMV4_PRIV_MAX] = {
[CS_ETMV4_TRCIDR0] = "trcidr/trcidr0",
[CS_ETMV4_TRCIDR1] = "trcidr/trcidr1",
[CS_ETMV4_TRCIDR2] = "trcidr/trcidr2",
[CS_ETMV4_TRCIDR8] = "trcidr/trcidr8",
[CS_ETMV4_TRCAUTHSTATUS] = "mgmt/trcauthstatus",
};
static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu)
{
bool ret = false;
char path[PATH_MAX];
int scan;
unsigned int val;
struct cs_etm_recording *ptr =
container_of(itr, struct cs_etm_recording, itr);
struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
/* Take any of the RO files for ETMv4 and see if it present */
snprintf(path, PATH_MAX, "cpu%d/%s",
cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR0]);
scan = perf_pmu__scan_file(cs_etm_pmu, path, "%x", &val);
/* The file was read successfully, we have a winner */
if (scan == 1)
ret = true;
return ret;
}
static int cs_etm_get_ro(struct perf_pmu *pmu, int cpu, const char *path)
{
char pmu_path[PATH_MAX];
int scan;
unsigned int val = 0;
/* Get RO metadata from sysfs */
snprintf(pmu_path, PATH_MAX, "cpu%d/%s", cpu, path);
scan = perf_pmu__scan_file(pmu, pmu_path, "%x", &val);
if (scan != 1)
pr_err("%s: error reading: %s\n", __func__, pmu_path);
return val;
}
static void cs_etm_get_metadata(int cpu, u32 *offset,
struct auxtrace_record *itr,
struct auxtrace_info_event *info)
{
u32 increment;
u64 magic;
struct cs_etm_recording *ptr =
container_of(itr, struct cs_etm_recording, itr);
struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
/* first see what kind of tracer this cpu is affined to */
if (cs_etm_is_etmv4(itr, cpu)) {
magic = __perf_cs_etmv4_magic;
/* Get trace configuration register */
info->priv[*offset + CS_ETMV4_TRCCONFIGR] =
cs_etmv4_get_config(itr);
/* Get traceID from the framework */
info->priv[*offset + CS_ETMV4_TRCTRACEIDR] =
coresight_get_trace_id(cpu);
/* Get read-only information from sysFS */
info->priv[*offset + CS_ETMV4_TRCIDR0] =
cs_etm_get_ro(cs_etm_pmu, cpu,
metadata_etmv4_ro[CS_ETMV4_TRCIDR0]);
info->priv[*offset + CS_ETMV4_TRCIDR1] =
cs_etm_get_ro(cs_etm_pmu, cpu,
metadata_etmv4_ro[CS_ETMV4_TRCIDR1]);
info->priv[*offset + CS_ETMV4_TRCIDR2] =
cs_etm_get_ro(cs_etm_pmu, cpu,
metadata_etmv4_ro[CS_ETMV4_TRCIDR2]);
info->priv[*offset + CS_ETMV4_TRCIDR8] =
cs_etm_get_ro(cs_etm_pmu, cpu,
metadata_etmv4_ro[CS_ETMV4_TRCIDR8]);
info->priv[*offset + CS_ETMV4_TRCAUTHSTATUS] =
cs_etm_get_ro(cs_etm_pmu, cpu,
metadata_etmv4_ro
[CS_ETMV4_TRCAUTHSTATUS]);
/* How much space was used */
increment = CS_ETMV4_PRIV_MAX;
} else {
magic = __perf_cs_etmv3_magic;
/* Get configuration register */
info->priv[*offset + CS_ETM_ETMCR] = cs_etm_get_config(itr);
/* Get traceID from the framework */
info->priv[*offset + CS_ETM_ETMTRACEIDR] =
coresight_get_trace_id(cpu);
/* Get read-only information from sysFS */
info->priv[*offset + CS_ETM_ETMCCER] =
cs_etm_get_ro(cs_etm_pmu, cpu,
metadata_etmv3_ro[CS_ETM_ETMCCER]);
info->priv[*offset + CS_ETM_ETMIDR] =
cs_etm_get_ro(cs_etm_pmu, cpu,
metadata_etmv3_ro[CS_ETM_ETMIDR]);
/* How much space was used */
increment = CS_ETM_PRIV_MAX;
}
/* Build generic header portion */
info->priv[*offset + CS_ETM_MAGIC] = magic;
info->priv[*offset + CS_ETM_CPU] = cpu;
/* Where the next CPU entry should start from */
*offset += increment;
}
static int cs_etm_info_fill(struct auxtrace_record *itr,
struct perf_session *session,
struct auxtrace_info_event *info,
size_t priv_size)
{
int i;
u32 offset;
u64 nr_cpu, type;
struct cpu_map *cpu_map;
struct cpu_map *event_cpus = session->evlist->cpus;
struct cpu_map *online_cpus = cpu_map__new(NULL);
struct cs_etm_recording *ptr =
container_of(itr, struct cs_etm_recording, itr);
struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
if (priv_size != cs_etm_info_priv_size(itr, session->evlist))
return -EINVAL;
if (!session->evlist->nr_mmaps)
return -EINVAL;
/* If the cpu_map is empty all online CPUs are involved */
if (cpu_map__empty(event_cpus)) {
cpu_map = online_cpus;
} else {
/* Make sure all specified CPUs are online */
for (i = 0; i < cpu_map__nr(event_cpus); i++) {
if (cpu_map__has(event_cpus, i) &&
!cpu_map__has(online_cpus, i))
return -EINVAL;
}
cpu_map = event_cpus;
}
nr_cpu = cpu_map__nr(cpu_map);
/* Get PMU type as dynamically assigned by the core */
type = cs_etm_pmu->type;
/* First fill out the session header */
info->type = PERF_AUXTRACE_CS_ETM;
info->priv[CS_HEADER_VERSION_0] = 0;
info->priv[CS_PMU_TYPE_CPUS] = type << 32;
info->priv[CS_PMU_TYPE_CPUS] |= nr_cpu;
info->priv[CS_ETM_SNAPSHOT] = ptr->snapshot_mode;
offset = CS_ETM_SNAPSHOT + 1;
for (i = 0; i < cpu__max_cpu() && offset < priv_size; i++)
if (cpu_map__has(cpu_map, i))
cs_etm_get_metadata(i, &offset, itr, info);
cpu_map__put(online_cpus);
return 0;
}
static int cs_etm_find_snapshot(struct auxtrace_record *itr __maybe_unused,
int idx, struct auxtrace_mmap *mm,
unsigned char *data __maybe_unused,
u64 *head, u64 *old)
{
pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n",
__func__, idx, (size_t)*old, (size_t)*head, mm->len);
*old = *head;
*head += mm->len;
return 0;
}
static int cs_etm_snapshot_start(struct auxtrace_record *itr)
{
struct cs_etm_recording *ptr =
container_of(itr, struct cs_etm_recording, itr);
struct perf_evsel *evsel;
evlist__for_each_entry(ptr->evlist, evsel) {
if (evsel->attr.type == ptr->cs_etm_pmu->type)
return perf_evsel__disable(evsel);
}
return -EINVAL;
}
static int cs_etm_snapshot_finish(struct auxtrace_record *itr)
{
struct cs_etm_recording *ptr =
container_of(itr, struct cs_etm_recording, itr);
struct perf_evsel *evsel;
evlist__for_each_entry(ptr->evlist, evsel) {
if (evsel->attr.type == ptr->cs_etm_pmu->type)
return perf_evsel__enable(evsel);
}
return -EINVAL;
}
static u64 cs_etm_reference(struct auxtrace_record *itr __maybe_unused)
{
return (((u64) rand() << 0) & 0x00000000FFFFFFFFull) |
(((u64) rand() << 32) & 0xFFFFFFFF00000000ull);
}
static void cs_etm_recording_free(struct auxtrace_record *itr)
{
struct cs_etm_recording *ptr =
container_of(itr, struct cs_etm_recording, itr);
free(ptr);
}
static int cs_etm_read_finish(struct auxtrace_record *itr, int idx)
{
struct cs_etm_recording *ptr =
container_of(itr, struct cs_etm_recording, itr);
struct perf_evsel *evsel;
evlist__for_each_entry(ptr->evlist, evsel) {
if (evsel->attr.type == ptr->cs_etm_pmu->type)
return perf_evlist__enable_event_idx(ptr->evlist,
evsel, idx);
}
return -EINVAL;
}
struct auxtrace_record *cs_etm_record_init(int *err)
{
struct perf_pmu *cs_etm_pmu;
struct cs_etm_recording *ptr;
cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
if (!cs_etm_pmu) {
*err = -EINVAL;
goto out;
}
ptr = zalloc(sizeof(struct cs_etm_recording));
if (!ptr) {
*err = -ENOMEM;
goto out;
}
ptr->cs_etm_pmu = cs_etm_pmu;
ptr->itr.parse_snapshot_options = cs_etm_parse_snapshot_options;
ptr->itr.recording_options = cs_etm_recording_options;
ptr->itr.info_priv_size = cs_etm_info_priv_size;
ptr->itr.info_fill = cs_etm_info_fill;
ptr->itr.find_snapshot = cs_etm_find_snapshot;
ptr->itr.snapshot_start = cs_etm_snapshot_start;
ptr->itr.snapshot_finish = cs_etm_snapshot_finish;
ptr->itr.reference = cs_etm_reference;
ptr->itr.free = cs_etm_recording_free;
ptr->itr.read_finish = cs_etm_read_finish;
*err = 0;
return &ptr->itr;
out:
return NULL;
}
static FILE *cs_device__open_file(const char *name)
{
struct stat st;
char path[PATH_MAX];
const char *sysfs;
sysfs = sysfs__mountpoint();
if (!sysfs)
return NULL;
snprintf(path, PATH_MAX,
"%s" CS_BUS_DEVICE_PATH "%s", sysfs, name);
if (stat(path, &st) < 0)
return NULL;
return fopen(path, "w");
}
static int __printf(2, 3) cs_device__print_file(const char *name, const char *fmt, ...)
{
va_list args;
FILE *file;
int ret = -EINVAL;
va_start(args, fmt);
file = cs_device__open_file(name);
if (file) {
ret = vfprintf(file, fmt, args);
fclose(file);
}
va_end(args);
return ret;
}
int cs_etm_set_drv_config(struct perf_evsel_config_term *term)
{
int ret;
char enable_sink[ENABLE_SINK_MAX];
snprintf(enable_sink, ENABLE_SINK_MAX, "%s/%s",
term->val.drv_cfg, "enable_sink");
ret = cs_device__print_file(enable_sink, "%d", 1);
if (ret < 0)
return ret;
return 0;
}

15
arch/arm/util/cs-etm.h Normal file
View file

@ -0,0 +1,15 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright(C) 2015 Linaro Limited. All rights reserved.
* Author: Mathieu Poirier <mathieu.poirier@linaro.org>
*/
#ifndef INCLUDE__PERF_CS_ETM_H__
#define INCLUDE__PERF_CS_ETM_H__
#include "../../util/evsel.h"
struct auxtrace_record *cs_etm_record_init(int *err);
int cs_etm_set_drv_config(struct perf_evsel_config_term *term);
#endif

View file

@ -0,0 +1,64 @@
/*
* Mapping of DWARF debug register numbers into register names.
*
* Copyright (C) 2010 Will Deacon, ARM Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <stddef.h>
#include <linux/stringify.h>
#include <dwarf-regs.h>
struct pt_regs_dwarfnum {
const char *name;
unsigned int dwarfnum;
};
#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
#define GPR_DWARFNUM_NAME(num) \
{.name = __stringify(%r##num), .dwarfnum = num}
#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
/*
* Reference:
* http://infocenter.arm.com/help/topic/com.arm.doc.ihi0040a/IHI0040A_aadwarf.pdf
*/
static const struct pt_regs_dwarfnum regdwarfnum_table[] = {
GPR_DWARFNUM_NAME(0),
GPR_DWARFNUM_NAME(1),
GPR_DWARFNUM_NAME(2),
GPR_DWARFNUM_NAME(3),
GPR_DWARFNUM_NAME(4),
GPR_DWARFNUM_NAME(5),
GPR_DWARFNUM_NAME(6),
GPR_DWARFNUM_NAME(7),
GPR_DWARFNUM_NAME(8),
GPR_DWARFNUM_NAME(9),
GPR_DWARFNUM_NAME(10),
REG_DWARFNUM_NAME("%fp", 11),
REG_DWARFNUM_NAME("%ip", 12),
REG_DWARFNUM_NAME("%sp", 13),
REG_DWARFNUM_NAME("%lr", 14),
REG_DWARFNUM_NAME("%pc", 15),
REG_DWARFNUM_END,
};
/**
* get_arch_regstr() - lookup register name from it's DWARF register number
* @n: the DWARF register number
*
* get_arch_regstr() returns the name of the register in struct
* regdwarfnum_table from it's DWARF register number. If the register is not
* found in the table, this returns NULL;
*/
const char *get_arch_regstr(unsigned int n)
{
const struct pt_regs_dwarfnum *roff;
for (roff = regdwarfnum_table; roff->name != NULL; roff++)
if (roff->dwarfnum == n)
return roff->name;
return NULL;
}

31
arch/arm/util/pmu.c Normal file
View file

@ -0,0 +1,31 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright(C) 2015 Linaro Limited. All rights reserved.
* Author: Mathieu Poirier <mathieu.poirier@linaro.org>
*/
#include <string.h>
#include <linux/coresight-pmu.h>
#include <linux/perf_event.h>
#include "cs-etm.h"
#include "arm-spe.h"
#include "../../util/pmu.h"
struct perf_event_attr
*perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
{
#ifdef HAVE_AUXTRACE_SUPPORT
if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) {
/* add ETM default config here */
pmu->selectable = true;
pmu->set_drv_config = cs_etm_set_drv_config;
#if defined(__aarch64__)
} else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) {
return arm_spe_pmu_default_config(pmu);
#endif
}
#endif
return NULL;
}

View file

@ -0,0 +1,38 @@
// SPDX-License-Identifier: GPL-2.0
#include <elfutils/libdwfl.h>
#include "../../util/unwind-libdw.h"
#include "../../util/perf_regs.h"
#include "../../util/event.h"
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
{
struct unwind_info *ui = arg;
struct regs_dump *user_regs = &ui->sample->user_regs;
Dwarf_Word dwarf_regs[PERF_REG_ARM_MAX];
#define REG(r) ({ \
Dwarf_Word val = 0; \
perf_reg_value(&val, user_regs, PERF_REG_ARM_##r); \
val; \
})
dwarf_regs[0] = REG(R0);
dwarf_regs[1] = REG(R1);
dwarf_regs[2] = REG(R2);
dwarf_regs[3] = REG(R3);
dwarf_regs[4] = REG(R4);
dwarf_regs[5] = REG(R5);
dwarf_regs[6] = REG(R6);
dwarf_regs[7] = REG(R7);
dwarf_regs[8] = REG(R8);
dwarf_regs[9] = REG(R9);
dwarf_regs[10] = REG(R10);
dwarf_regs[11] = REG(FP);
dwarf_regs[12] = REG(IP);
dwarf_regs[13] = REG(SP);
dwarf_regs[14] = REG(LR);
dwarf_regs[15] = REG(PC);
return dwfl_thread_state_registers(thread, 0, PERF_REG_ARM_MAX,
dwarf_regs);
}

View file

@ -0,0 +1,50 @@
// SPDX-License-Identifier: GPL-2.0
#include <errno.h>
#include <libunwind.h>
#include "perf_regs.h"
#include "../../util/unwind.h"
#include "../../util/debug.h"
int libunwind__arch_reg_id(int regnum)
{
switch (regnum) {
case UNW_ARM_R0:
return PERF_REG_ARM_R0;
case UNW_ARM_R1:
return PERF_REG_ARM_R1;
case UNW_ARM_R2:
return PERF_REG_ARM_R2;
case UNW_ARM_R3:
return PERF_REG_ARM_R3;
case UNW_ARM_R4:
return PERF_REG_ARM_R4;
case UNW_ARM_R5:
return PERF_REG_ARM_R5;
case UNW_ARM_R6:
return PERF_REG_ARM_R6;
case UNW_ARM_R7:
return PERF_REG_ARM_R7;
case UNW_ARM_R8:
return PERF_REG_ARM_R8;
case UNW_ARM_R9:
return PERF_REG_ARM_R9;
case UNW_ARM_R10:
return PERF_REG_ARM_R10;
case UNW_ARM_R11:
return PERF_REG_ARM_FP;
case UNW_ARM_R12:
return PERF_REG_ARM_IP;
case UNW_ARM_R13:
return PERF_REG_ARM_SP;
case UNW_ARM_R14:
return PERF_REG_ARM_LR;
case UNW_ARM_R15:
return PERF_REG_ARM_PC;
default:
pr_err("unwind: invalid reg id %d\n", regnum);
return -EINVAL;
}
return -EINVAL;
}

2
arch/arm64/Build Normal file
View file

@ -0,0 +1,2 @@
libperf-y += util/
libperf-$(CONFIG_DWARF_UNWIND) += tests/

6
arch/arm64/Makefile Normal file
View file

@ -0,0 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
ifndef NO_DWARF
PERF_HAVE_DWARF_REGS := 1
endif
PERF_HAVE_JITDUMP := 1
PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1

View file

@ -0,0 +1,64 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/compiler.h>
#include <sys/types.h>
#include <regex.h>
struct arm64_annotate {
regex_t call_insn,
jump_insn;
};
static struct ins_ops *arm64__associate_instruction_ops(struct arch *arch, const char *name)
{
struct arm64_annotate *arm = arch->priv;
struct ins_ops *ops;
regmatch_t match[2];
if (!regexec(&arm->jump_insn, name, 2, match, 0))
ops = &jump_ops;
else if (!regexec(&arm->call_insn, name, 2, match, 0))
ops = &call_ops;
else if (!strcmp(name, "ret"))
ops = &ret_ops;
else
return NULL;
arch__associate_ins_ops(arch, name, ops);
return ops;
}
static int arm64__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
{
struct arm64_annotate *arm;
int err;
if (arch->initialized)
return 0;
arm = zalloc(sizeof(*arm));
if (!arm)
return -1;
/* bl, blr */
err = regcomp(&arm->call_insn, "^blr?$", REG_EXTENDED);
if (err)
goto out_free_arm;
/* b, b.cond, br, cbz/cbnz, tbz/tbnz */
err = regcomp(&arm->jump_insn, "^[ct]?br?\\.?(cc|cs|eq|ge|gt|hi|le|ls|lt|mi|ne|pl)?n?z?$",
REG_EXTENDED);
if (err)
goto out_free_call;
arch->initialized = true;
arch->priv = arm;
arch->associate_instruction_ops = arm64__associate_instruction_ops;
arch->objdump.comment_char = '/';
arch->objdump.skip_functions_char = '+';
return 0;
out_free_call:
regfree(&arm->call_insn);
out_free_arm:
free(arm);
return -1;
}

View file

@ -0,0 +1,12 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef ARCH_TESTS_H
#define ARCH_TESTS_H
#ifdef HAVE_DWARF_UNWIND_SUPPORT
struct thread;
struct perf_sample;
#endif
extern struct test arch_tests[];
#endif

View file

@ -0,0 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifdef DEFINE_DWARF_REGSTR_TABLE
/* This is included in perf/util/dwarf-regs.c */
static const char * const aarch64_regstr_tbl[] = {
"%x0", "%x1", "%x2", "%x3", "%x4",
"%x5", "%x6", "%x7", "%x8", "%x9",
"%x10", "%x11", "%x12", "%x13", "%x14",
"%x15", "%x16", "%x17", "%x18", "%x19",
"%x20", "%x21", "%x22", "%x23", "%x24",
"%x25", "%x26", "%x27", "%x28", "%x29",
"%lr", "%sp",
};
#endif

View file

@ -0,0 +1,94 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef ARCH_PERF_REGS_H
#define ARCH_PERF_REGS_H
#include <stdlib.h>
#include <linux/types.h>
#include <asm/perf_regs.h>
void perf_regs_load(u64 *regs);
#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM64_MAX) - 1)
#define PERF_REGS_MAX PERF_REG_ARM64_MAX
#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64
#define PERF_REG_IP PERF_REG_ARM64_PC
#define PERF_REG_SP PERF_REG_ARM64_SP
static inline const char *perf_reg_name(int id)
{
switch (id) {
case PERF_REG_ARM64_X0:
return "x0";
case PERF_REG_ARM64_X1:
return "x1";
case PERF_REG_ARM64_X2:
return "x2";
case PERF_REG_ARM64_X3:
return "x3";
case PERF_REG_ARM64_X4:
return "x4";
case PERF_REG_ARM64_X5:
return "x5";
case PERF_REG_ARM64_X6:
return "x6";
case PERF_REG_ARM64_X7:
return "x7";
case PERF_REG_ARM64_X8:
return "x8";
case PERF_REG_ARM64_X9:
return "x9";
case PERF_REG_ARM64_X10:
return "x10";
case PERF_REG_ARM64_X11:
return "x11";
case PERF_REG_ARM64_X12:
return "x12";
case PERF_REG_ARM64_X13:
return "x13";
case PERF_REG_ARM64_X14:
return "x14";
case PERF_REG_ARM64_X15:
return "x15";
case PERF_REG_ARM64_X16:
return "x16";
case PERF_REG_ARM64_X17:
return "x17";
case PERF_REG_ARM64_X18:
return "x18";
case PERF_REG_ARM64_X19:
return "x19";
case PERF_REG_ARM64_X20:
return "x20";
case PERF_REG_ARM64_X21:
return "x21";
case PERF_REG_ARM64_X22:
return "x22";
case PERF_REG_ARM64_X23:
return "x23";
case PERF_REG_ARM64_X24:
return "x24";
case PERF_REG_ARM64_X25:
return "x25";
case PERF_REG_ARM64_X26:
return "x26";
case PERF_REG_ARM64_X27:
return "x27";
case PERF_REG_ARM64_X28:
return "x28";
case PERF_REG_ARM64_X29:
return "x29";
case PERF_REG_ARM64_SP:
return "sp";
case PERF_REG_ARM64_LR:
return "lr";
case PERF_REG_ARM64_PC:
return "pc";
default:
return NULL;
}
return NULL;
}
#endif /* ARCH_PERF_REGS_H */

4
arch/arm64/tests/Build Normal file
View file

@ -0,0 +1,4 @@
libperf-y += regs_load.o
libperf-y += dwarf-unwind.o
libperf-y += arch-tests.o

View file

@ -0,0 +1,16 @@
// SPDX-License-Identifier: GPL-2.0
#include <string.h>
#include "tests/tests.h"
#include "arch-tests.h"
struct test arch_tests[] = {
#ifdef HAVE_DWARF_UNWIND_SUPPORT
{
.desc = "DWARF unwind",
.func = test__dwarf_unwind,
},
#endif
{
.func = NULL,
},
};

View file

@ -0,0 +1,62 @@
// SPDX-License-Identifier: GPL-2.0
#include <string.h>
#include "perf_regs.h"
#include "thread.h"
#include "map.h"
#include "event.h"
#include "debug.h"
#include "tests/tests.h"
#define STACK_SIZE 8192
static int sample_ustack(struct perf_sample *sample,
struct thread *thread, u64 *regs)
{
struct stack_dump *stack = &sample->user_stack;
struct map *map;
unsigned long sp;
u64 stack_size, *buf;
buf = malloc(STACK_SIZE);
if (!buf) {
pr_debug("failed to allocate sample uregs data\n");
return -1;
}
sp = (unsigned long) regs[PERF_REG_ARM64_SP];
map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
if (!map) {
pr_debug("failed to get stack map\n");
free(buf);
return -1;
}
stack_size = map->end - sp;
stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
memcpy(buf, (void *) sp, stack_size);
stack->data = (char *) buf;
stack->size = stack_size;
return 0;
}
int test__arch_unwind_sample(struct perf_sample *sample,
struct thread *thread)
{
struct regs_dump *regs = &sample->user_regs;
u64 *buf;
buf = calloc(1, sizeof(u64) * PERF_REGS_MAX);
if (!buf) {
pr_debug("failed to allocate sample uregs data\n");
return -1;
}
perf_regs_load(buf);
regs->abi = PERF_SAMPLE_REGS_ABI;
regs->regs = buf;
regs->mask = PERF_REGS_MASK;
return sample_ustack(sample, thread, buf);
}

View file

@ -0,0 +1,47 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
.text
.type perf_regs_load,%function
#define STR_REG(r) str x##r, [x0, 8 * r]
#define LDR_REG(r) ldr x##r, [x0, 8 * r]
#define SP (8 * 31)
#define PC (8 * 32)
ENTRY(perf_regs_load)
STR_REG(0)
STR_REG(1)
STR_REG(2)
STR_REG(3)
STR_REG(4)
STR_REG(5)
STR_REG(6)
STR_REG(7)
STR_REG(8)
STR_REG(9)
STR_REG(10)
STR_REG(11)
STR_REG(12)
STR_REG(13)
STR_REG(14)
STR_REG(15)
STR_REG(16)
STR_REG(17)
STR_REG(18)
STR_REG(19)
STR_REG(20)
STR_REG(21)
STR_REG(22)
STR_REG(23)
STR_REG(24)
STR_REG(25)
STR_REG(26)
STR_REG(27)
STR_REG(28)
STR_REG(29)
STR_REG(30)
mov x1, sp
str x1, [x0, #SP]
str x30, [x0, #PC]
LDR_REG(1)
ret
ENDPROC(perf_regs_load)

10
arch/arm64/util/Build Normal file
View file

@ -0,0 +1,10 @@
libperf-y += header.o
libperf-y += sym-handling.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \
../../arm/util/auxtrace.o \
../../arm/util/cs-etm.o \
arm-spe.o

225
arch/arm64/util/arm-spe.c Normal file
View file

@ -0,0 +1,225 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Arm Statistical Profiling Extensions (SPE) support
* Copyright (c) 2017-2018, Arm Ltd.
*/
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/bitops.h>
#include <linux/log2.h>
#include <time.h>
#include "../../util/cpumap.h"
#include "../../util/evsel.h"
#include "../../util/evlist.h"
#include "../../util/session.h"
#include "../../util/util.h"
#include "../../util/pmu.h"
#include "../../util/debug.h"
#include "../../util/auxtrace.h"
#include "../../util/arm-spe.h"
#define KiB(x) ((x) * 1024)
#define MiB(x) ((x) * 1024 * 1024)
struct arm_spe_recording {
struct auxtrace_record itr;
struct perf_pmu *arm_spe_pmu;
struct perf_evlist *evlist;
};
static size_t
arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused,
struct perf_evlist *evlist __maybe_unused)
{
return ARM_SPE_AUXTRACE_PRIV_SIZE;
}
static int arm_spe_info_fill(struct auxtrace_record *itr,
struct perf_session *session,
struct auxtrace_info_event *auxtrace_info,
size_t priv_size)
{
struct arm_spe_recording *sper =
container_of(itr, struct arm_spe_recording, itr);
struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
if (priv_size != ARM_SPE_AUXTRACE_PRIV_SIZE)
return -EINVAL;
if (!session->evlist->nr_mmaps)
return -EINVAL;
auxtrace_info->type = PERF_AUXTRACE_ARM_SPE;
auxtrace_info->priv[ARM_SPE_PMU_TYPE] = arm_spe_pmu->type;
return 0;
}
static int arm_spe_recording_options(struct auxtrace_record *itr,
struct perf_evlist *evlist,
struct record_opts *opts)
{
struct arm_spe_recording *sper =
container_of(itr, struct arm_spe_recording, itr);
struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
struct perf_evsel *evsel, *arm_spe_evsel = NULL;
bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
struct perf_evsel *tracking_evsel;
int err;
sper->evlist = evlist;
evlist__for_each_entry(evlist, evsel) {
if (evsel->attr.type == arm_spe_pmu->type) {
if (arm_spe_evsel) {
pr_err("There may be only one " ARM_SPE_PMU_NAME "x event\n");
return -EINVAL;
}
evsel->attr.freq = 0;
evsel->attr.sample_period = 1;
arm_spe_evsel = evsel;
opts->full_auxtrace = true;
}
}
if (!opts->full_auxtrace)
return 0;
/* We are in full trace mode but '-m,xyz' wasn't specified */
if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
if (privileged) {
opts->auxtrace_mmap_pages = MiB(4) / page_size;
} else {
opts->auxtrace_mmap_pages = KiB(128) / page_size;
if (opts->mmap_pages == UINT_MAX)
opts->mmap_pages = KiB(256) / page_size;
}
}
/* Validate auxtrace_mmap_pages */
if (opts->auxtrace_mmap_pages) {
size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
size_t min_sz = KiB(8);
if (sz < min_sz || !is_power_of_2(sz)) {
pr_err("Invalid mmap size for ARM SPE: must be at least %zuKiB and a power of 2\n",
min_sz / 1024);
return -EINVAL;
}
}
/*
* To obtain the auxtrace buffer file descriptor, the auxtrace event
* must come first.
*/
perf_evlist__to_front(evlist, arm_spe_evsel);
perf_evsel__set_sample_bit(arm_spe_evsel, CPU);
perf_evsel__set_sample_bit(arm_spe_evsel, TIME);
perf_evsel__set_sample_bit(arm_spe_evsel, TID);
/* Add dummy event to keep tracking */
err = parse_events(evlist, "dummy:u", NULL);
if (err)
return err;
tracking_evsel = perf_evlist__last(evlist);
perf_evlist__set_tracking_event(evlist, tracking_evsel);
tracking_evsel->attr.freq = 0;
tracking_evsel->attr.sample_period = 1;
perf_evsel__set_sample_bit(tracking_evsel, TIME);
perf_evsel__set_sample_bit(tracking_evsel, CPU);
perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
return 0;
}
static u64 arm_spe_reference(struct auxtrace_record *itr __maybe_unused)
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
return ts.tv_sec ^ ts.tv_nsec;
}
static void arm_spe_recording_free(struct auxtrace_record *itr)
{
struct arm_spe_recording *sper =
container_of(itr, struct arm_spe_recording, itr);
free(sper);
}
static int arm_spe_read_finish(struct auxtrace_record *itr, int idx)
{
struct arm_spe_recording *sper =
container_of(itr, struct arm_spe_recording, itr);
struct perf_evsel *evsel;
evlist__for_each_entry(sper->evlist, evsel) {
if (evsel->attr.type == sper->arm_spe_pmu->type)
return perf_evlist__enable_event_idx(sper->evlist,
evsel, idx);
}
return -EINVAL;
}
struct auxtrace_record *arm_spe_recording_init(int *err,
struct perf_pmu *arm_spe_pmu)
{
struct arm_spe_recording *sper;
if (!arm_spe_pmu) {
*err = -ENODEV;
return NULL;
}
sper = zalloc(sizeof(struct arm_spe_recording));
if (!sper) {
*err = -ENOMEM;
return NULL;
}
sper->arm_spe_pmu = arm_spe_pmu;
sper->itr.recording_options = arm_spe_recording_options;
sper->itr.info_priv_size = arm_spe_info_priv_size;
sper->itr.info_fill = arm_spe_info_fill;
sper->itr.free = arm_spe_recording_free;
sper->itr.reference = arm_spe_reference;
sper->itr.read_finish = arm_spe_read_finish;
sper->itr.alignment = 0;
return &sper->itr;
}
struct perf_event_attr
*arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu)
{
struct perf_event_attr *attr;
attr = zalloc(sizeof(struct perf_event_attr));
if (!attr) {
pr_err("arm_spe default config cannot allocate a perf_event_attr\n");
return NULL;
}
/*
* If kernel driver doesn't advertise a minimum,
* use max allowable by PMSIDR_EL1.INTERVAL
*/
if (perf_pmu__scan_file(arm_spe_pmu, "caps/min_interval", "%llu",
&attr->sample_period) != 1) {
pr_debug("arm_spe driver doesn't advertise a min. interval. Using 4096\n");
attr->sample_period = 4096;
}
arm_spe_pmu->selectable = true;
arm_spe_pmu->is_uncore = false;
return attr;
}

View file

@ -0,0 +1,96 @@
/*
* Mapping of DWARF debug register numbers into register names.
*
* Copyright (C) 2010 Will Deacon, ARM Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <errno.h>
#include <stddef.h>
#include <string.h>
#include <dwarf-regs.h>
#include <linux/ptrace.h> /* for struct user_pt_regs */
#include <linux/stringify.h>
#include "util.h"
struct pt_regs_dwarfnum {
const char *name;
unsigned int dwarfnum;
};
#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
#define GPR_DWARFNUM_NAME(num) \
{.name = __stringify(%x##num), .dwarfnum = num}
#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
#define DWARFNUM2OFFSET(index) \
(index * sizeof((struct user_pt_regs *)0)->regs[0])
/*
* Reference:
* http://infocenter.arm.com/help/topic/com.arm.doc.ihi0057b/IHI0057B_aadwarf64.pdf
*/
static const struct pt_regs_dwarfnum regdwarfnum_table[] = {
GPR_DWARFNUM_NAME(0),
GPR_DWARFNUM_NAME(1),
GPR_DWARFNUM_NAME(2),
GPR_DWARFNUM_NAME(3),
GPR_DWARFNUM_NAME(4),
GPR_DWARFNUM_NAME(5),
GPR_DWARFNUM_NAME(6),
GPR_DWARFNUM_NAME(7),
GPR_DWARFNUM_NAME(8),
GPR_DWARFNUM_NAME(9),
GPR_DWARFNUM_NAME(10),
GPR_DWARFNUM_NAME(11),
GPR_DWARFNUM_NAME(12),
GPR_DWARFNUM_NAME(13),
GPR_DWARFNUM_NAME(14),
GPR_DWARFNUM_NAME(15),
GPR_DWARFNUM_NAME(16),
GPR_DWARFNUM_NAME(17),
GPR_DWARFNUM_NAME(18),
GPR_DWARFNUM_NAME(19),
GPR_DWARFNUM_NAME(20),
GPR_DWARFNUM_NAME(21),
GPR_DWARFNUM_NAME(22),
GPR_DWARFNUM_NAME(23),
GPR_DWARFNUM_NAME(24),
GPR_DWARFNUM_NAME(25),
GPR_DWARFNUM_NAME(26),
GPR_DWARFNUM_NAME(27),
GPR_DWARFNUM_NAME(28),
GPR_DWARFNUM_NAME(29),
REG_DWARFNUM_NAME("%lr", 30),
REG_DWARFNUM_NAME("%sp", 31),
REG_DWARFNUM_END,
};
/**
* get_arch_regstr() - lookup register name from it's DWARF register number
* @n: the DWARF register number
*
* get_arch_regstr() returns the name of the register in struct
* regdwarfnum_table from it's DWARF register number. If the register is not
* found in the table, this returns NULL;
*/
const char *get_arch_regstr(unsigned int n)
{
const struct pt_regs_dwarfnum *roff;
for (roff = regdwarfnum_table; roff->name != NULL; roff++)
if (roff->dwarfnum == n)
return roff->name;
return NULL;
}
int regs_query_register_offset(const char *name)
{
const struct pt_regs_dwarfnum *roff;
for (roff = regdwarfnum_table; roff->name != NULL; roff++)
if (!strcmp(roff->name, name))
return DWARFNUM2OFFSET(roff->dwarfnum);
return -EINVAL;
}

65
arch/arm64/util/header.c Normal file
View file

@ -0,0 +1,65 @@
#include <stdio.h>
#include <stdlib.h>
#include <api/fs/fs.h>
#include "header.h"
#define MIDR "/regs/identification/midr_el1"
#define MIDR_SIZE 19
#define MIDR_REVISION_MASK 0xf
#define MIDR_VARIANT_SHIFT 20
#define MIDR_VARIANT_MASK (0xf << MIDR_VARIANT_SHIFT)
char *get_cpuid_str(struct perf_pmu *pmu)
{
char *buf = NULL;
char path[PATH_MAX];
const char *sysfs = sysfs__mountpoint();
int cpu;
u64 midr = 0;
struct cpu_map *cpus;
FILE *file;
if (!sysfs || !pmu || !pmu->cpus)
return NULL;
buf = malloc(MIDR_SIZE);
if (!buf)
return NULL;
/* read midr from list of cpus mapped to this pmu */
cpus = cpu_map__get(pmu->cpus);
for (cpu = 0; cpu < cpus->nr; cpu++) {
scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d"MIDR,
sysfs, cpus->map[cpu]);
file = fopen(path, "r");
if (!file) {
pr_debug("fopen failed for file %s\n", path);
continue;
}
if (!fgets(buf, MIDR_SIZE, file)) {
fclose(file);
continue;
}
fclose(file);
/* Ignore/clear Variant[23:20] and
* Revision[3:0] of MIDR
*/
midr = strtoul(buf, NULL, 16);
midr &= (~(MIDR_VARIANT_MASK | MIDR_REVISION_MASK));
scnprintf(buf, MIDR_SIZE, "0x%016lx", midr);
/* got midr break loop */
break;
}
if (!midr) {
pr_err("failed to get cpuid string for PMU %s\n", pmu->name);
free(buf);
buf = NULL;
}
cpu_map__put(cpus);
return buf;
}

View file

@ -0,0 +1,22 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation.
*
* Copyright (C) 2015 Naveen N. Rao, IBM Corporation
*/
#include "debug.h"
#include "symbol.h"
#include "map.h"
#include "probe-event.h"
#include "probe-file.h"
#ifdef HAVE_LIBELF_SUPPORT
bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
{
return ehdr.e_type == ET_EXEC ||
ehdr.e_type == ET_REL ||
ehdr.e_type == ET_DYN;
}
#endif

View file

@ -0,0 +1,60 @@
// SPDX-License-Identifier: GPL-2.0
#include <elfutils/libdwfl.h>
#include "../../util/unwind-libdw.h"
#include "../../util/perf_regs.h"
#include "../../util/event.h"
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
{
struct unwind_info *ui = arg;
struct regs_dump *user_regs = &ui->sample->user_regs;
Dwarf_Word dwarf_regs[PERF_REG_ARM64_MAX], dwarf_pc;
#define REG(r) ({ \
Dwarf_Word val = 0; \
perf_reg_value(&val, user_regs, PERF_REG_ARM64_##r); \
val; \
})
dwarf_regs[0] = REG(X0);
dwarf_regs[1] = REG(X1);
dwarf_regs[2] = REG(X2);
dwarf_regs[3] = REG(X3);
dwarf_regs[4] = REG(X4);
dwarf_regs[5] = REG(X5);
dwarf_regs[6] = REG(X6);
dwarf_regs[7] = REG(X7);
dwarf_regs[8] = REG(X8);
dwarf_regs[9] = REG(X9);
dwarf_regs[10] = REG(X10);
dwarf_regs[11] = REG(X11);
dwarf_regs[12] = REG(X12);
dwarf_regs[13] = REG(X13);
dwarf_regs[14] = REG(X14);
dwarf_regs[15] = REG(X15);
dwarf_regs[16] = REG(X16);
dwarf_regs[17] = REG(X17);
dwarf_regs[18] = REG(X18);
dwarf_regs[19] = REG(X19);
dwarf_regs[20] = REG(X20);
dwarf_regs[21] = REG(X21);
dwarf_regs[22] = REG(X22);
dwarf_regs[23] = REG(X23);
dwarf_regs[24] = REG(X24);
dwarf_regs[25] = REG(X25);
dwarf_regs[26] = REG(X26);
dwarf_regs[27] = REG(X27);
dwarf_regs[28] = REG(X28);
dwarf_regs[29] = REG(X29);
dwarf_regs[30] = REG(LR);
dwarf_regs[31] = REG(SP);
if (!dwfl_thread_state_registers(thread, 0, PERF_REG_ARM64_MAX,
dwarf_regs))
return false;
dwarf_pc = REG(PC);
dwfl_thread_state_register_pc(thread, dwarf_pc);
return true;
}

View file

@ -0,0 +1,86 @@
// SPDX-License-Identifier: GPL-2.0
#include <errno.h>
#ifndef REMOTE_UNWIND_LIBUNWIND
#include <libunwind.h>
#include "perf_regs.h"
#include "../../util/unwind.h"
#include "../../util/debug.h"
#endif
int LIBUNWIND__ARCH_REG_ID(int regnum)
{
switch (regnum) {
case UNW_AARCH64_X0:
return PERF_REG_ARM64_X0;
case UNW_AARCH64_X1:
return PERF_REG_ARM64_X1;
case UNW_AARCH64_X2:
return PERF_REG_ARM64_X2;
case UNW_AARCH64_X3:
return PERF_REG_ARM64_X3;
case UNW_AARCH64_X4:
return PERF_REG_ARM64_X4;
case UNW_AARCH64_X5:
return PERF_REG_ARM64_X5;
case UNW_AARCH64_X6:
return PERF_REG_ARM64_X6;
case UNW_AARCH64_X7:
return PERF_REG_ARM64_X7;
case UNW_AARCH64_X8:
return PERF_REG_ARM64_X8;
case UNW_AARCH64_X9:
return PERF_REG_ARM64_X9;
case UNW_AARCH64_X10:
return PERF_REG_ARM64_X10;
case UNW_AARCH64_X11:
return PERF_REG_ARM64_X11;
case UNW_AARCH64_X12:
return PERF_REG_ARM64_X12;
case UNW_AARCH64_X13:
return PERF_REG_ARM64_X13;
case UNW_AARCH64_X14:
return PERF_REG_ARM64_X14;
case UNW_AARCH64_X15:
return PERF_REG_ARM64_X15;
case UNW_AARCH64_X16:
return PERF_REG_ARM64_X16;
case UNW_AARCH64_X17:
return PERF_REG_ARM64_X17;
case UNW_AARCH64_X18:
return PERF_REG_ARM64_X18;
case UNW_AARCH64_X19:
return PERF_REG_ARM64_X19;
case UNW_AARCH64_X20:
return PERF_REG_ARM64_X20;
case UNW_AARCH64_X21:
return PERF_REG_ARM64_X21;
case UNW_AARCH64_X22:
return PERF_REG_ARM64_X22;
case UNW_AARCH64_X23:
return PERF_REG_ARM64_X23;
case UNW_AARCH64_X24:
return PERF_REG_ARM64_X24;
case UNW_AARCH64_X25:
return PERF_REG_ARM64_X25;
case UNW_AARCH64_X26:
return PERF_REG_ARM64_X26;
case UNW_AARCH64_X27:
return PERF_REG_ARM64_X27;
case UNW_AARCH64_X28:
return PERF_REG_ARM64_X28;
case UNW_AARCH64_X29:
return PERF_REG_ARM64_X29;
case UNW_AARCH64_X30:
return PERF_REG_ARM64_LR;
case UNW_AARCH64_SP:
return PERF_REG_ARM64_SP;
case UNW_AARCH64_PC:
return PERF_REG_ARM64_PC;
default:
pr_err("unwind: invalid reg id %d\n", regnum);
return -EINVAL;
}
return -EINVAL;
}

202
arch/common.c Normal file
View file

@ -0,0 +1,202 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include "common.h"
#include "../util/env.h"
#include "../util/util.h"
#include "../util/debug.h"
const char *const arm_triplets[] = {
"arm-eabi-",
"arm-linux-androideabi-",
"arm-unknown-linux-",
"arm-unknown-linux-gnu-",
"arm-unknown-linux-gnueabi-",
"arm-linux-gnu-",
"arm-linux-gnueabihf-",
"arm-none-eabi-",
NULL
};
const char *const arm64_triplets[] = {
"aarch64-linux-android-",
"aarch64-linux-gnu-",
NULL
};
const char *const powerpc_triplets[] = {
"powerpc-unknown-linux-gnu-",
"powerpc-linux-gnu-",
"powerpc64-unknown-linux-gnu-",
"powerpc64-linux-gnu-",
"powerpc64le-linux-gnu-",
NULL
};
const char *const s390_triplets[] = {
"s390-ibm-linux-",
"s390x-linux-gnu-",
NULL
};
const char *const sh_triplets[] = {
"sh-unknown-linux-gnu-",
"sh64-unknown-linux-gnu-",
"sh-linux-gnu-",
"sh64-linux-gnu-",
NULL
};
const char *const sparc_triplets[] = {
"sparc-unknown-linux-gnu-",
"sparc64-unknown-linux-gnu-",
"sparc64-linux-gnu-",
NULL
};
const char *const x86_triplets[] = {
"x86_64-pc-linux-gnu-",
"x86_64-unknown-linux-gnu-",
"i686-pc-linux-gnu-",
"i586-pc-linux-gnu-",
"i486-pc-linux-gnu-",
"i386-pc-linux-gnu-",
"i686-linux-android-",
"i686-android-linux-",
"x86_64-linux-gnu-",
"i586-linux-gnu-",
NULL
};
const char *const mips_triplets[] = {
"mips-unknown-linux-gnu-",
"mipsel-linux-android-",
"mips-linux-gnu-",
"mips64-linux-gnu-",
"mips64el-linux-gnuabi64-",
"mips64-linux-gnuabi64-",
"mipsel-linux-gnu-",
NULL
};
static bool lookup_path(char *name)
{
bool found = false;
char *path, *tmp = NULL;
char buf[PATH_MAX];
char *env = getenv("PATH");
if (!env)
return false;
env = strdup(env);
if (!env)
return false;
path = strtok_r(env, ":", &tmp);
while (path) {
scnprintf(buf, sizeof(buf), "%s/%s", path, name);
if (access(buf, F_OK) == 0) {
found = true;
break;
}
path = strtok_r(NULL, ":", &tmp);
}
free(env);
return found;
}
static int lookup_triplets(const char *const *triplets, const char *name)
{
int i;
char buf[PATH_MAX];
for (i = 0; triplets[i] != NULL; i++) {
scnprintf(buf, sizeof(buf), "%s%s", triplets[i], name);
if (lookup_path(buf))
return i;
}
return -1;
}
static int perf_env__lookup_binutils_path(struct perf_env *env,
const char *name, const char **path)
{
int idx;
const char *arch = perf_env__arch(env), *cross_env;
const char *const *path_list;
char *buf = NULL;
/*
* We don't need to try to find objdump path for native system.
* Just use default binutils path (e.g.: "objdump").
*/
if (!strcmp(perf_env__arch(NULL), arch))
goto out;
cross_env = getenv("CROSS_COMPILE");
if (cross_env) {
if (asprintf(&buf, "%s%s", cross_env, name) < 0)
goto out_error;
if (buf[0] == '/') {
if (access(buf, F_OK) == 0)
goto out;
goto out_error;
}
if (lookup_path(buf))
goto out;
zfree(&buf);
}
if (!strcmp(arch, "arm"))
path_list = arm_triplets;
else if (!strcmp(arch, "arm64"))
path_list = arm64_triplets;
else if (!strcmp(arch, "powerpc"))
path_list = powerpc_triplets;
else if (!strcmp(arch, "sh"))
path_list = sh_triplets;
else if (!strcmp(arch, "s390"))
path_list = s390_triplets;
else if (!strcmp(arch, "sparc"))
path_list = sparc_triplets;
else if (!strcmp(arch, "x86"))
path_list = x86_triplets;
else if (!strcmp(arch, "mips"))
path_list = mips_triplets;
else {
ui__error("binutils for %s not supported.\n", arch);
goto out_error;
}
idx = lookup_triplets(path_list, name);
if (idx < 0) {
ui__error("Please install %s for %s.\n"
"You can add it to PATH, set CROSS_COMPILE or "
"override the default using --%s.\n",
name, arch, name);
goto out_error;
}
if (asprintf(&buf, "%s%s", path_list[idx], name) < 0)
goto out_error;
out:
*path = buf;
return 0;
out_error:
free(buf);
*path = NULL;
return -1;
}
int perf_env__lookup_objdump(struct perf_env *env)
{
/*
* For live mode, env->arch will be NULL and we can use
* the native objdump tool.
*/
if (env->arch == NULL)
return 0;
return perf_env__lookup_binutils_path(env, "objdump", &objdump_path);
}

11
arch/common.h Normal file
View file

@ -0,0 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef ARCH_PERF_COMMON_H
#define ARCH_PERF_COMMON_H
#include "../util/env.h"
extern const char *objdump_path;
int perf_env__lookup_objdump(struct perf_env *env);
#endif /* ARCH_PERF_COMMON_H */

1
arch/mips/Build Normal file
View file

@ -0,0 +1 @@
# empty

1
arch/parisc/Build Normal file
View file

@ -0,0 +1 @@
# empty

Some files were not shown because too many files have changed in this diff Show more